/**********************************************************************
DO FILE SUMMARY

Rural-Urban Differences in Diabetes Care and Control in 40 Low- and Middle-Income
Countries: A Cross-Sectional Study of Nationally Representative, Individual-Level Data

Analysis and appendix file

David Flood, for the HPACC collaborators
University of Michigan

September 19, 2021
**********************************************************************/

////////////////////////////////////////////////////////////////////////////////
//////////////////////////////// WEQ ANALYSIS //////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
	
		
/*******************************************************************************
WEQ REGRESSIONS OVERALL
*******************************************************************************/

* Overall ----------------------------------------------------------------------

foreach outcome of varlist clin_dia $dependent_vars {			

		* Generate sample
		gen sample = 1 if analytic_sample == 1 & `outcome' != . & rural != . & sex != .

		* Rescaling weights
			* bys country: egen sum_wpop_sample=sum(w_fbg) if sample==1   // pop weights not needed
			* gen wpopnr_sample=w_fbg*Population2015/sum_wpop_sample

			bys country: egen sum_weq_sample=sum(w_fbg) if sample==1		
			gen weqnr_sample=w_fbg/sum_weq_sample
		
		* Make age spline
			mkspline age_spline = age if sample == 1, cubic nknots(5) displayknots
		
		* Set svyset
		svyset psu_num[pw = weqnr_sample], strata(stratum_num) singleunit(centered) // note weights are not rescaled
		
		* Raw proportions
		svy, subpop(sample): proportion `outcome', over(rural)
		matlist r(table)
		matrix weq_prop_`outcome' = r(table)
		
		* Regression
		svy, subpop(sample): poisson `outcome' i.rural c.age_spline* i.country_encoded, irr baselevels
		matlist r(table)
		matrix weq_rr_`outcome' = r(table)
		
		* Predictive margins
		margins rural, vce(unconditional)
		matlist r(table)
		matrix weq_pred_`outcome' = r(table)
										
		* dydx margins
		margins, dydx(rural) vce(unconditional)
		matlist r(table)
		matrix weq_dydx_`outcome' = r(table)
		
		* Drop variables for loop
		drop sample weqnr_sample sum_weq_sample age_spline*
		}	
		
* By sex -----------------------------------------------------------------------

foreach sex of numlist 0(1)1 {
	
	foreach outcome of varlist clin_dia $dependent_vars {			

		* Generate sample
		gen sample = 1 if analytic_sample == 1 & `outcome' != . & rural != . & sex == `sex'

		* Rescaling weights
			* bys country: egen sum_wpop_sample=sum(w_fbg) if sample==1   // pop weights not needed
			* gen wpopnr_sample=w_fbg*Population2015/sum_wpop_sample

			bys country: egen sum_weq_sample=sum(w_fbg) if sample==1		
			gen weqnr_sample=w_fbg/sum_weq_sample
		
		* Make age spline
			mkspline age_spline = age if sample == 1, cubic nknots(5) displayknots
		
		* Set svyset
		svyset psu_num[pw = weqnr_sample], strata(stratum_num) singleunit(centered) // note weights are not rescaled
		
		* Raw proportions
		svy, subpop(sample): proportion `outcome', over(rural)
		matlist r(table)
		matrix weq_prop_`outcome'_sex`sex' = r(table)
		
		* Regression
		svy, subpop(sample): poisson `outcome' i.rural c.age_spline* i.country_encoded, irr baselevels
		matlist r(table)
		matrix weq_rr_`outcome'_sex`sex' = r(table)
		
		* Predictive margins
		margins rural, vce(unconditional)
		matlist r(table)
		matrix weq_pred_`outcome'_sex`sex' = r(table)
										
		* dydx margins
		margins, dydx(rural) vce(unconditional)
		matlist r(table)
		matrix weq_dydx_`outcome'_sex`sex' = r(table)
		
		* Drop variables for loop
		drop sample weqnr_sample sum_weq_sample age_spline*
	}
}
		
/*******************************************************************************
WEQ RR FOREST PLOTS - OVERALL
*******************************************************************************/

* 1. Generating the spreadsheet using putexcel ---------------------------------

	/*	This step creates an Excel file with the risk ratios and average marginal
		effects for the aggregate weq regression output.
	*/

* Create the Excel doc
cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Rural-urban paper/Putexcel tables"
putexcel set "weq rr forestplot ${date}.xlsx", replace 

* Making Excel frame
putexcel A1 = "name"
putexcel A2 = "Ever tested"
putexcel A3 = "Awareness"
putexcel A4 = "Glucose-lowering med"
putexcel A5 = "BP-lowering med"
putexcel A6 = "Statin"
putexcel A7 = "Glycemic control"
putexcel A8 = "BP control"
putexcel A9 = "Cholesterol control"
putexcel A10 = "Combined AB control"
putexcel A11 = "Combined ABC control"
	
putexcel B1 = "est_rr"
putexcel C1 = "lci_rr"
putexcel D1 = "uci_rr"
putexcel E1 = "est_dydx"
putexcel F1 = "lci_dydx"
putexcel G1 = "uci_dydx"
putexcel H1 = "p_value"
putexcel I1 = "est_urban_pred"
putexcel J1 = "lci_urban_pred"
putexcel K1 = "uci_urban_pred"
putexcel L1 = "est_rural_pred"
putexcel M1 = "lci_rural_pred"
putexcel N1 = "uci_rural_pred"

* Exporting to Excel

local n = 1

foreach outcome in $dependent_vars {	
	
	local n = `n' + 1
	
	matlist weq_rr_`outcome'
	matrix results = weq_rr_`outcome'

	local b = results[1,2]
	local lci = results[5,2]
	local uci = results[6,2]
	local p = results[4,2]

	putexcel B`n' = `b'
	putexcel C`n' = `lci'
	putexcel D`n' = `uci'
	putexcel H`n' = `p'
	
	matlist weq_dydx_`outcome'
	matrix results = weq_dydx_`outcome'

	local b = results[1,2]*100
	local lci = results[5,2]*100
	local uci = results[6,2]*100

	putexcel E`n' = `b'
	putexcel F`n' = `lci'
	putexcel G`n' = `uci'
	
	matlist weq_pred_`outcome'
	matrix results = weq_pred_`outcome'
	
	putexcel I`n' = results[1,1]*100
	putexcel J`n' = results[5,1]*100
	putexcel K`n' = results[6,1]*100
	putexcel L`n' = results[1,2]*100
	putexcel M`n' = results[5,2]*100
	putexcel N`n' = results[6,2]*100

}

* 2. Generating weq rr forestplots ---------------------------------------------

frame create weq_forestplot
frame change weq_forestplot
	
import excel "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Rural-urban paper/Putexcel tables/weq rr forestplot $date.xlsx", sheet("Sheet1") firstrow cellrange(A1:N11) clear

insobs 1, before(1)
insobs 2, after(3)
insobs 2, after(8)

gen id = _n

replace name = "{bf:Diagnosis}" if id == 1
replace name = " " if id == 4
replace name = "{bf:Treatment}" if id == 5
replace name = " " if id == 9
replace name = "{bf:Control}" if id == 10

* See help for what each of the "_USE" categories refers to	
gen _USE = 1
replace _USE = 0 if inlist(id,1,5,10)
replace _USE = 6 if missing(name)

* Transforming for eform display in forestplot
gen est_rr_exp = log(est_rr)
gen lci_rr_exp = log(lci_rr)
gen uci_rr_exp = log(uci_rr)

* Making string variables	
gen pvalue_s = string(p_value,"%4.3f")
replace pvalue_s = "<0.001" if p_value <0.001
replace pvalue_s = "" if p_value == .

gen est_rr_s = string(est_rr,"%9.2f")
gen lci_rr_s = string(lci_rr,"%9.2f")
gen uci_rr_s = string(uci_rr,"%9.2f")
gen output_rr_s = est_rr_s + " (" + lci_rr_s + " to " + uci_rr_s + ")"
replace output_rr_s = "" if est_rr == .

gen est_dydx_s = string(est_dydx,"%9.1f")
gen lci_dydx_s = string(lci_dydx,"%9.1f")
gen uci_dydx_s = string(uci_dydx,"%9.1f")
gen output_dydx_s = est_dydx_s + " (" + lci_dydx_s + " to " + uci_dydx_s + ")"
replace output_dydx_s = "" if est_dydx == .

gen est_urban_pred_s = string(est_urban_pred,"%9.1f")
gen lci_urban_pred_s = string(lci_urban_pred,"%9.1f")
gen uci_urban_pred_s = string(uci_urban_pred,"%9.1f")
gen output_urban_pred_s = est_urban_pred_s + " (" + lci_urban_pred_s + " to " + uci_urban_pred_s + ")"
replace output_urban_pred_s = "" if est_urban_pred == .

gen est_rural_pred_s = string(est_rural_pred,"%9.1f")
gen lci_rural_pred_s = string(lci_rural_pred,"%9.1f")
gen uci_rural_pred_s = string(uci_rural_pred,"%9.1f")
gen output_rural_pred_s = est_rural_pred_s + " (" + lci_rural_pred_s + " to " + uci_rural_pred_s + ")"
replace output_rural_pred_s = "" if est_rural_pred == .

* Realiigning
leftalign

* Adding bolded labels		
label var name `"`"{bf:Outcome}"'"'
label var output_rr_s `"`"{bf:Risk ratio}"'"'
label var output_dydx_s `"`"{bf:Average marginal}"' `"{bf:effect (%)}"'"'
label var pvalue_s `"`"{bf:P value}"'"'
label var output_urban_pred_s `"`"{bf:Urban adjusted}"' `"{bf:proportion (%)}"'"'
label var output_rural_pred_s `"`"{bf:Rural adjusted}"' `"{bf:proportion (%)}"'"'

format %-6s pvalue_s

/*	Note: This code uses the "forestplot" program built by David Fisher. He has a number
	of variable helpful posts on Statalist:
	
	https://www.statalist.org/forums/forum/general-stata-discussion/general/1471066-editing-headings-on-metan-forest-plots
*/

local line_size ".2rs"
local forest_font_size "2.1"
local arrow_font_size "2"	

forestplot est_rr_exp lci_rr_exp uci_rr_exp, ///
	labels(name) ///
	eform ///
	nostats /// this tells it not to calculate stats but rather use raw input data
	rcols(output_rr_s output_dydx_s pvalue_s output_urban_pred_s output_rural_pred_s) /// this tells forestplot which columsn to display
	range(0.25 1.8) /// range of the plot
	xlabel(0.25 0.5 1 2, labsize(2)) /// plot labels xmtick(0.3(0.1)1.5) ///
	astext(85) ///
	spacing(1.5) /// this refers to spacing by line
	/// style options
		diamopts(lwidth(`line_size')) ///
		boxopts(mcolor(none)) ///
		ciopts(lwidth(`line_size')) ///
		nlineopts(lwidth(`line_size')) ///
		olineopts(lwidth(0)) ///
		pointopts(msymbol(square) msize(0.25rs)) ///
	plotregion(margin(l=0 r=0 b=0rs t=0) lcolor(none)) ///
	graphregion(margin(l=0 r=0 b=0rs t=0))	///
	xsize(4) /// 	
	ysize(3) ///
	xtitle("Risk ratio", size(`forest_font_size')  margin(l=0 r=60 b=0 t=1)) ///
	/// favours("Less rural     " "achievement      " # "     More rural" "     achievement", labsize(2.1rs) nosymmetric) ///
	name(weq_forest_plot,replace) //

* Fixing formatting issues	----------------------------------------------------

* Size of text in forestplot
	gr_edit plotregion1.plot5.style.editstyle label(textstyle(size(`forest_font_size'))) editcopy
	gr_edit plotregion1.plot6.style.editstyle label(textstyle(size(`forest_font_size'))) editcopy
	gr_edit plotregion1.plot7.style.editstyle label(textstyle(size(`forest_font_size'))) editcopy
	gr_edit plotregion1.plot8.style.editstyle label(textstyle(size(`forest_font_size'))) editcopy
	gr_edit plotregion1.plot9.style.editstyle label(textstyle(size(`forest_font_size'))) editcopy
	gr_edit plotregion1.plot10.style.editstyle label(textstyle(size(`forest_font_size'))) editcopy
	gr_edit plotregion1.plot11.style.editstyle label(textstyle(size(`forest_font_size'))) editcopy
		
* Top black line
gr_edit plotregion1._xylines[1].style.editstyle linestyle(color(black)) editcopy // gray line at top

* Arrows and labels for "favors"
gr_edit .AddLine added_lines editor 40 17 50 17
		gr_edit .added_lines_new = 1
		gr_edit .added_lines_rec = 1
		gr_edit .added_lines[1].style.editstyle  linestyle( width( sztype(relative) val(.2) allow_pct(1)) color(black) pattern(solid) align(inside)) headstyle( symbol(circle) linestyle( width( sztype(relative) val(.2) allow_pct(1)) color(black) pattern(solid) align(inside)) fillcolor(black) size( sztype(relative) val(1.52778) allow_pct(1)) angle(stdarrow) symangle(zero) backsymbol(none) backline( width( sztype(relative) val(.2) allow_pct(1)) color(black) pattern(solid) align(inside)) backcolor(black) backsize( sztype(relative) val(0) allow_pct(1)) backangle(stdarrow) backsymangle(zero)) headpos(neither) editcopy
		gr_edit .added_lines[1].style.editstyle headstyle(size(medium)) editcopy
		gr_edit .added_lines[1].style.editstyle headpos(head) editcopy

		gr_edit .AddLine added_lines editor 34 17 24 17
		gr_edit .added_lines_new = 2
		gr_edit .added_lines_rec = 2
		gr_edit .added_lines[2].style.editstyle  linestyle( width( sztype(relative) val(.2) allow_pct(1)) color(black) pattern(solid) align(inside)) headstyle( symbol(circle) linestyle( width( sztype(relative) val(.2) allow_pct(1)) color(black) pattern(solid) align(inside)) fillcolor(black) size( sztype(relative) val(1.52778) allow_pct(1)) angle(stdarrow) symangle(zero) backsymbol(none) backline( width( sztype(relative) val(.2) allow_pct(1)) color(black) pattern(solid) align(inside)) backcolor(black) backsize( sztype(relative) val(0) allow_pct(1)) backangle(stdarrow) backsymangle(zero)) headpos(neither) editcopy
		gr_edit .added_lines[2].style.editstyle headstyle(size(medium)) editcopy
		gr_edit .added_lines[2].style.editstyle headpos(head) editcopy

		gr_edit .AddTextBox added_text editor 15 39
		gr_edit .added_text_new = 1
		gr_edit .added_text_rec = 1
		gr_edit .added_text[1].style.editstyle  angle(default) size( sztype(relative) val(2) allow_pct(1)) color(black) horizontal(right) vertical(middle) margin( gleft( sztype(relative) val(0) allow_pct(1)) gright( sztype(relative) val(0) allow_pct(1)) gtop( sztype(relative) val(0) allow_pct(1)) gbottom( sztype(relative) val(0) allow_pct(1))) linegap( sztype(relative) val(0) allow_pct(1)) drawbox(no) boxmargin( gleft( sztype(relative) val(0) allow_pct(1)) gright( sztype(relative) val(0) allow_pct(1)) gtop( sztype(relative) val(0) allow_pct(1)) gbottom( sztype(relative) val(0) allow_pct(1))) fillcolor(bluishgray) linestyle( width( sztype(relative) val(.2) allow_pct(1)) color(black) pattern(solid) align(inside)) box_alignment(east) editcopy
		gr_edit .added_text[1].style.editstyle size(`arrow_font_size') editcopy
		gr_edit .added_text[1].text = {}
		gr_edit .added_text[1].text.Arrpush `"{it:Greater in rural}"'

		gr_edit .AddTextBox added_text editor 15 22
		gr_edit .added_text_new = 2
		gr_edit .added_text_rec = 2
		gr_edit .added_text[2].style.editstyle  angle(default) size( sztype(relative) val(2) allow_pct(1)) color(black) horizontal(left) vertical(middle) margin( gleft( sztype(relative) val(0) allow_pct(1)) gright( sztype(relative) val(0) allow_pct(1)) gtop( sztype(relative) val(0) allow_pct(1)) gbottom( sztype(relative) val(0) allow_pct(1))) linegap( sztype(relative) val(0) allow_pct(1)) drawbox(no) boxmargin( gleft( sztype(relative) val(0) allow_pct(1)) gright( sztype(relative) val(0) allow_pct(1)) gtop( sztype(relative) val(0) allow_pct(1)) gbottom( sztype(relative) val(0) allow_pct(1))) fillcolor(bluishgray) linestyle( width( sztype(relative) val(.2) allow_pct(1)) color(black) pattern(solid) align(inside)) box_alignment(east) editcopy
		gr_edit .added_text[2].style.editstyle size(`arrow_font_size`'') editcopy
		gr_edit .added_text[2].text = {}
		gr_edit .added_text[2].text.Arrpush `"{it:Lower in rural}"'

	cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Rural-urban paper/Figures"
	graph save "weq_forest_plot_${date}.gph", replace         
	graph export "weq_forest_plot_${date}.pdf", as(pdf) name(weq_forest_plot) replace
		
frame change default
frame drop weq_forestplot

/*******************************************************************************
WEQ RR FOREST PLOTS - BY SEX
*******************************************************************************/

* 1. Generating the spreadsheet using putexcel ---------------------------------

	/*	This step creates an Excel file with the risk ratios and average marginal
		effects for the aggregate weq regression output.
	*/

foreach sex of numlist 0(1)1 {	
	
	* Create the Excel doc
	cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Rural-urban paper/Putexcel tables"
	putexcel set "weq rr forestplot sex`sex' ${date}.xlsx", replace 

	* Making Excel frame
	putexcel A1 = "name"
	putexcel A2 = "Ever tested"
	putexcel A3 = "Awareness"
	putexcel A4 = "Glucose-lowering med"
	putexcel A5 = "BP-lowering med"
	putexcel A6 = "Statin"
	putexcel A7 = "Glycemic control"
	putexcel A8 = "BP control"
	putexcel A9 = "Cholesterol control"
	putexcel A10 = "Combined AB control"
	putexcel A11 = "Combined ABC control"
		
	putexcel B1 = "est_rr"
	putexcel C1 = "lci_rr"
	putexcel D1 = "uci_rr"
	putexcel E1 = "est_dydx"
	putexcel F1 = "lci_dydx"
	putexcel G1 = "uci_dydx"
	putexcel H1 = "p_value"
	putexcel I1 = "est_urban_pred"
	putexcel J1 = "lci_urban_pred"
	putexcel K1 = "uci_urban_pred"
	putexcel L1 = "est_rural_pred"
	putexcel M1 = "lci_rural_pred"
	putexcel N1 = "uci_rural_pred"

	* Exporting to Excel

	local n = 1

	foreach outcome in $dependent_vars {	
		
		local n = `n' + 1
		
		matlist weq_rr_`outcome'_sex`sex'
		matrix results = weq_rr_`outcome'_sex`sex'

		local b = results[1,2]
		local lci = results[5,2]
		local uci = results[6,2]
		local p = results[4,2]

		putexcel B`n' = `b'
		putexcel C`n' = `lci'
		putexcel D`n' = `uci'
		putexcel H`n' = `p'
		
		matlist weq_dydx_`outcome'_sex`sex'
		matrix results = weq_dydx_`outcome'_sex`sex'

		local b = results[1,2]*100
		local lci = results[5,2]*100
		local uci = results[6,2]*100

		putexcel E`n' = `b'
		putexcel F`n' = `lci'
		putexcel G`n' = `uci'
		
		matlist weq_pred_`outcome'_sex`sex'
		matrix results = weq_pred_`outcome'_sex`sex'
		
		putexcel I`n' = results[1,1]*100
		putexcel J`n' = results[5,1]*100
		putexcel K`n' = results[6,1]*100
		putexcel L`n' = results[1,2]*100
		putexcel M`n' = results[5,2]*100
		putexcel N`n' = results[6,2]*100

	}
}
	
* 2. Generating weq rr forestplots ---------------------------------------------

frame create weq_forestplot
frame change weq_forestplot
	
cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Rural-urban paper"
import excel "Putexcel tables/weq rr forestplot sex1 $date.xlsx", sheet("Sheet1") firstrow cellrange(A1:N11) clear
gen id = _n
save "Temporary datasets/weq rr forestplot sex1 $date.dta", replace

import excel "Putexcel tables/weq rr forestplot sex0 $date.xlsx", sheet("Sheet1") firstrow cellrange(A1:N11) clear
gen id = _n
append using "Temporary datasets/weq rr forestplot sex1 $date.dta", nolabel gen(sex)
	
* Sorting
sort id sex	

* Dropping and replacing duplicate headers
replace name = "" if sex == 1

insobs 1, before(1)
insobs 2, after(5)
insobs 2, after(13)

replace name = "{bf:Diagnosis}" if _n == 1
replace name = " " if _n == 6
replace name = "{bf:Treatment}" if _n  == 7
replace name = " " if _n == 14
replace name = "{bf:Control}" if _n == 15

* See help for what each of the "_USE" categories refers to	
gen _USE = 1
replace _USE = 0 if inlist(_n,1,7,15)
replace _USE = 6 if inlist(_n,6,14)

* Transforming for eform display in forestplot
gen est_rr_exp = log(est_rr)
gen lci_rr_exp = log(lci_rr)
gen uci_rr_exp = log(uci_rr)

* Making string variables	
gen pvalue_s = string(p_value,"%4.3f")
replace pvalue_s = "<0.001" if p_value <0.001
replace pvalue_s = "" if p_value == .

gen est_rr_s = string(est_rr,"%9.2f")
gen lci_rr_s = string(lci_rr,"%9.2f")
gen uci_rr_s = string(uci_rr,"%9.2f")
gen output_rr_s = est_rr_s + " (" + lci_rr_s + " to " + uci_rr_s + ")"
replace output_rr_s = "" if est_rr == .

gen est_dydx_s = string(est_dydx,"%9.1f")
gen lci_dydx_s = string(lci_dydx,"%9.1f")
gen uci_dydx_s = string(uci_dydx,"%9.1f")
gen output_dydx_s = est_dydx_s + " (" + lci_dydx_s + " to " + uci_dydx_s + ")"
replace output_dydx_s = "" if est_dydx == .

gen est_urban_pred_s = string(est_urban_pred,"%9.1f")
gen lci_urban_pred_s = string(lci_urban_pred,"%9.1f")
gen uci_urban_pred_s = string(uci_urban_pred,"%9.1f")
gen output_urban_pred_s = est_urban_pred_s + " (" + lci_urban_pred_s + " to " + uci_urban_pred_s + ")"
replace output_urban_pred_s = "" if est_urban_pred == .

gen est_rural_pred_s = string(est_rural_pred,"%9.1f")
gen lci_rural_pred_s = string(lci_rural_pred,"%9.1f")
gen uci_rural_pred_s = string(uci_rural_pred,"%9.1f")
gen output_rural_pred_s = est_rural_pred_s + " (" + lci_rural_pred_s + " to " + uci_rural_pred_s + ")"
replace output_rural_pred_s = "" if est_rural_pred == .

* Realiigning
leftalign

* Adding bolded labels		
label var name `"`"{bf:Performance}"' `"{bf:measure}"'"'
label var output_rr_s `"`"{bf:Risk ratio}"'"'
label var output_dydx_s `"`"{bf:Average marginal}"' `"{bf:effect (%)}"'"'
label var pvalue_s `"`"{bf:P value}"'"'
label var output_urban_pred_s `"`"{bf:Urban adjusted}"' `"{bf:proportion (%)}"'"'
label var output_rural_pred_s `"`"{bf:Rural adjusted}"' `"{bf:proportion (%)}"'"'

format %-6s pvalue_s

/*	Note: This code uses the "forestplot" program built by David Fisher. He has a number
	of variable helpful posts on Statalist:
	
	https://www.statalist.org/forums/forum/general-stata-discussion/general/1471066-editing-headings-on-metan-forest-plots
	
	Changing colors to lancet theme:
	
	https://rdrr.io/cran/ggsci/man/pal_lancet.html

	hex to RGB converer: https://www.rapidtables.com/convert/color/hex-to-rgb.html
	
	*/
	local lancet_blue "0 70 139" // #00468BFF
	local lancet_red "237 0 0 " // #ED0000FF
	

local line_size ".2rs"
local forest_font_size "1.8"
local arrow_font_size "1.8"	

forestplot est_rr_exp lci_rr_exp uci_rr_exp, ///
	labels(name) ///
	eform ///
	nostats /// this tells it not to calculate stats but rather use raw input data
	rcols(output_rr_s output_dydx_s pvalue_s output_urban_pred_s output_rural_pred_s) /// this tells forestplot which columsn to display
	range(0.25 1.8) /// range of the plot
	xlabel(0.25 0.5 1 2, labsize(`forest_font_size')) /// plot labels xmtick(0.3(0.1)1.5) ///
	astext(85) ///
	spacing(1.5) /// this refers to spacing by line
	/// style options
		diamopts(lwidth(`line_size')) ///
		boxopts(mcolor(none)) ///
		ciopts(lwidth(`line_size')) ///
		nlineopts(lwidth(`line_size')) ///
		olineopts(lwidth(0)) ///
		pointopts(msymbol(square) msize(0.25rs)) ///
	plotregion(margin(l=0 r=0 b=0rs t=0) lcolor(none)) ///
	graphregion(margin(l=0 r=0 b=0rs t=0))	///
	plotid(sex) ///
		box1opts(mcolor(none)) ci1opts(lcolor(navy)) point1opts(mcolor(navy)) diam1opts(color(navy)) ///
		box2opts(mcolor(none)) ci2opts(lcolor(red)) point2opts(mcolor(red)) diam2opts(color(red)) ///
	xsize(4) ///
 	ysize(3.5) ///
	xtitle("Risk ratio", size(`forest_font_size')  margin(l=0 r=51 b=0 t=1)) ///
	/// favours("Less rural     " "achievement      " # "     More rural" "     achievement", labsize(2.1rs) nosymmetric) ///
	name(weq_forest_plot_sex,replace) //

* Fixing formatting issues	----------------------------------------------------

* Colors
	* Blue
	gr_edit .plotregion1.plot7.style.editstyle marker(fillcolor(`lancet_blue')) editcopy
	gr_edit .plotregion1.plot7.style.editstyle marker(linestyle(color(`lancet_blue'))) editcopy
	gr_edit .plotregion1.plot3.style.editstyle area(linestyle(color(`lancet_blue'))) editcopy
	
	* Red
	gr_edit .plotregion1.plot8.style.editstyle marker(fillcolor(`lancet_red')) editcopy
	gr_edit .plotregion1.plot8.style.editstyle marker(linestyle(color(`lancet_red'))) editcopy
	gr_edit .plotregion1.plot4.style.editstyle area(linestyle(color(`lancet_red'))) editcopy
	gr_edit .plotregion1.plot5.style.editstyle marker(linestyle(color(`lancet_red'))) editcopy

* Size of text in forestplot
	gr_edit plotregion1.plot8.style.editstyle label(textstyle(size(`forest_font_size'))) editcopy
	gr_edit plotregion1.plot9.style.editstyle label(textstyle(size(`forest_font_size'))) editcopy
	gr_edit plotregion1.plot10.style.editstyle label(textstyle(size(`forest_font_size'))) editcopy
	gr_edit plotregion1.plot11.style.editstyle label(textstyle(size(`forest_font_size'))) editcopy
	gr_edit plotregion1.plot12.style.editstyle label(textstyle(size(`forest_font_size'))) editcopy
	gr_edit plotregion1.plot13.style.editstyle label(textstyle(size(`forest_font_size'))) editcopy
	gr_edit plotregion1.plot14.style.editstyle label(textstyle(size(`forest_font_size'))) editcopy
	
	
* Top black line
gr_edit plotregion1._xylines[1].style.editstyle linestyle(color(black)) editcopy // gray line at top

* Arrows and labels for "favors"
gr_edit .AddLine added_lines editor 35 8 45 8
		gr_edit .added_lines_new = 1
		gr_edit .added_lines_rec = 1
		gr_edit .added_lines[1].style.editstyle  linestyle( width( sztype(relative) val(.2) allow_pct(1)) color(black) pattern(solid) align(inside)) headstyle( symbol(circle) linestyle( width( sztype(relative) val(.2) allow_pct(1)) color(black) pattern(solid) align(inside)) fillcolor(black) size( sztype(relative) val(1.52778) allow_pct(1)) angle(stdarrow) symangle(zero) backsymbol(none) backline( width( sztype(relative) val(.2) allow_pct(1)) color(black) pattern(solid) align(inside)) backcolor(black) backsize( sztype(relative) val(0) allow_pct(1)) backangle(stdarrow) backsymangle(zero)) headpos(neither) editcopy
		gr_edit .added_lines[1].style.editstyle headstyle(size(medium)) editcopy
		gr_edit .added_lines[1].style.editstyle headpos(head) editcopy

		gr_edit .AddLine added_lines editor 29 8 19 8
		gr_edit .added_lines_new = 2
		gr_edit .added_lines_rec = 2
		gr_edit .added_lines[2].style.editstyle  linestyle( width( sztype(relative) val(.2) allow_pct(1)) color(black) pattern(solid) align(inside)) headstyle( symbol(circle) linestyle( width( sztype(relative) val(.2) allow_pct(1)) color(black) pattern(solid) align(inside)) fillcolor(black) size( sztype(relative) val(1.52778) allow_pct(1)) angle(stdarrow) symangle(zero) backsymbol(none) backline( width( sztype(relative) val(.2) allow_pct(1)) color(black) pattern(solid) align(inside)) backcolor(black) backsize( sztype(relative) val(0) allow_pct(1)) backangle(stdarrow) backsymangle(zero)) headpos(neither) editcopy
		gr_edit .added_lines[2].style.editstyle headstyle(size(medium)) editcopy
		gr_edit .added_lines[2].style.editstyle headpos(head) editcopy

		gr_edit .AddTextBox added_text editor 6 33
		gr_edit .added_text_new = 1
		gr_edit .added_text_rec = 1
		gr_edit .added_text[1].style.editstyle  angle(default) size( sztype(relative) val(2) allow_pct(1)) color(black) horizontal(right) vertical(middle) margin( gleft( sztype(relative) val(0) allow_pct(1)) gright( sztype(relative) val(0) allow_pct(1)) gtop( sztype(relative) val(0) allow_pct(1)) gbottom( sztype(relative) val(0) allow_pct(1))) linegap( sztype(relative) val(0) allow_pct(1)) drawbox(no) boxmargin( gleft( sztype(relative) val(0) allow_pct(1)) gright( sztype(relative) val(0) allow_pct(1)) gtop( sztype(relative) val(0) allow_pct(1)) gbottom( sztype(relative) val(0) allow_pct(1))) fillcolor(bluishgray) linestyle( width( sztype(relative) val(.2) allow_pct(1)) color(black) pattern(solid) align(inside)) box_alignment(east) editcopy
		gr_edit .added_text[1].style.editstyle size(`arrow_font_size') editcopy
		gr_edit .added_text[1].text = {}
		gr_edit .added_text[1].text.Arrpush `"{it:Greater in rural}"'

		gr_edit .AddTextBox added_text editor 6 19
		gr_edit .added_text_new = 2
		gr_edit .added_text_rec = 2
		gr_edit .added_text[2].style.editstyle  angle(default) size( sztype(relative) val(2) allow_pct(1)) color(black) horizontal(left) vertical(middle) margin( gleft( sztype(relative) val(0) allow_pct(1)) gright( sztype(relative) val(0) allow_pct(1)) gtop( sztype(relative) val(0) allow_pct(1)) gbottom( sztype(relative) val(0) allow_pct(1))) linegap( sztype(relative) val(0) allow_pct(1)) drawbox(no) boxmargin( gleft( sztype(relative) val(0) allow_pct(1)) gright( sztype(relative) val(0) allow_pct(1)) gtop( sztype(relative) val(0) allow_pct(1)) gbottom( sztype(relative) val(0) allow_pct(1))) fillcolor(bluishgray) linestyle( width( sztype(relative) val(.2) allow_pct(1)) color(black) pattern(solid) align(inside)) box_alignment(east) editcopy
		gr_edit .added_text[2].style.editstyle size(`arrow_font_size`'') editcopy
		gr_edit .added_text[2].text = {}
		gr_edit .added_text[2].text.Arrpush `"{it:Lower in rural}"'
		
	cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Rural-urban paper/Figures"
	graph save "weq_forest_plot_sex_${date}.gph", replace         
	graph export "weq_forest_plot_sex_${date}.pdf", as(pdf) name(weq_forest_plot_sex) replace
		
frame change default
frame drop weq_forestplot

/*******************************************************************************
WEQ PREDICTIVE MARGINS PLOTS
*******************************************************************************/

* 1. Generating the spreadsheet using putexcel ---------------------------------

	/*	This step creates an Excel file with the risk ratios and average marginal
		effects for the aggregate weq regression output.
	*/

* Create the Excel doc
cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Rural-urban paper/Putexcel tables"
putexcel set "weq pred plot ${date}.xlsx", replace 

* Making Excel frame
putexcel A1 = "name"
putexcel A2 = "Ever tested"
putexcel A3 = "Awareness"
putexcel A4 = "Glucose-lowering med"
putexcel A5 = "BP-lowering med"
putexcel A6 = "Statin"
putexcel A7 = "Glycemic control"
putexcel A8 = "BP control"
putexcel A9 = "Cholesterol control"
putexcel A10 = "Combined AB control"
putexcel A11 = "Combined ABC control"

putexcel B1 = "est_pred_urban"
putexcel C1 = "lci_pred_urban"
putexcel D1 = "uci_pred_urban"
putexcel E1 = "est_pred_rural"
putexcel F1 = "lci_pred_rural"
putexcel G1 = "uci_pred_rural"

* Exporting to Excel

local n = 1

foreach outcome in $dependent_vars {	
	
	local n = `n' + 1
	
	matlist weq_pred_`outcome'
	matrix results = weq_pred_`outcome'

	putexcel B`n' = results[1,1]*100
	putexcel C`n' = results[5,1]*100
	putexcel D`n' = results[6,1]*100
	putexcel E`n' = results[1,2]*100
	putexcel F`n' = results[5,2]*100
	putexcel G`n' = results[6,2]*100

}

frame create pred_forestplot
frame change pred_forestplot

import excel "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Rural-urban paper/Putexcel tables/weq pred plot ${date}.xlsx", sheet("Sheet1") firstrow cellrange(A1:G11) clear

local bar_width "0.30"

gen id = _n
gen id_urban = id - `bar_width'/2
gen id_rural = id + `bar_width'/2

replace name = "Glucose- lowering med" if name == "Glucose-lowering med"

* gen blank y value to help with x value labels of id_rural
gen blank = .

* Encodes for the valuelabel
labmask id, values(name) // This encodes the id variable using the ccode labels
* splitvallabels id, length(10) 
tab id

local bar_text "4.5rs"

splitvallabels id , length(10) nobreak
graph twoway ///
	scatter blank id, ///
		xlabel(`r(relabel)', noticks labsize(`bar_text')) 	|| ///
	bar est_pred_urban id_urban, ///
		fcolor(gs7) ///
		lcolor(black) ///
		msize(1rs) ///
		barw(`bar_width') || ///
	rcap  lci_pred_urban uci_pred_urban id_urban, ///
		lcolor(black) ///
		msize(1rs)	||	 ///
	bar est_pred_rural id_rural, ///
		fcolor(gs14) ///
		lcolor(black) ///
		msize(1rs) ///
		barw(`bar_width') || ///
	rcap  lci_pred_rural uci_pred_rural id_rural, ///
		lcolor(black) ///
		msize(1rs)			 ///
	/// scatter uci_pred_urban id_urban, 	///
	///	msymbol(none) mlabel(est_pred_urban) mlabposition(12) mlabgap(1.3rs) mlabsize(3.4rs) mlabformat(%3.1f) mlabcolor(black) || ///
	/// scatter uci_pred_rural id_rural, 	///
	///	msymbol(none) mlabel(est_pred_rural) mlabposition(12) mlabgap(1.3rs) mlabsize(3.4rs) mlabformat(%3.1f) mlabcolor(black)  ///
	/// graph attributes
		xsize(15) ///
		ysize(5) ///
		xscale(r(0.5 10.5) line)  ///
		yscale(r(0 70) lcolor(black)  titlegap(0) line)  ///
		ylabel(0(10)70, angle(horizontal) labsize(`bar_text')) ///
		legend(order(2 "Urban" 4 "Rural") cols(1) size(`bar_text') ring(0) position(1) region(color(none) lwidth(none))) ///
		xtitle("") ///
		ytitle("Proportion achieving goal (%)", size(`bar_text') color(black) margin(medsmall)) ///
		xtick(1.5(1)10.5, tlength(0.2cm)) ///
		ytick(, tlength(0.2cm)) ///
		plotregion(margin(l=0 r=0 b=0 t=0) lwidth(none) lalign(center))  ///
		name(weq_bar_pred, replace) //	
		
	cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Rural-urban paper/Figures"
	graph save "weq_bar_pred_${date}.gph", replace         
	graph export "weq_bar_pred_${date}.pdf", as(pdf) name(weq_bar_pred) replace		

frame change default
frame drop pred_forestplot

/*******************************************************************************
Appendix table - Proportion of diabetes population in rural vs. urban areas
*******************************************************************************/

* 1. Calculations --------------------------------------------------------------
	
* Proportion of diabetes population who are rural by country

	foreach n of numlist 1(1)`=scalar(n_countries)'  { // 
			
		* Generate sample
		gen sample = 1 if analytic_sample == 1 & rural != . & clin_dia == 1 & country_encoded == `n'
		
		* Output using proportions for countries with stratified samples
		if `n' != 12 & `n' != 31 { // country_encoded values for El Salvador and Romania
		
			* Set svyset
			svyset psu_num[pw = w_fbg], strata(stratum_num) singleunit(centered) // note weights are not rescaled

			svy, subpop(sample): proportion rural
			matlist r(table)
			matrix u_c_ru_clin_dia_rural_`n' = r(table)
		}

		* Output using proportions for countries without stratified samples
		else  { 
		
			* Set svyset
			svyset [pw = w_fbg], singleunit(centered) // note weights are not rescaled

			svy, subpop(sample): proportion rural
			matlist r(table)
			matrix u_c_ru_clin_dia_rural_`n' = r(table)
		}
		drop sample				
	}

	
* Pooled equal weights

	foreach outcome of varlist rural {
				
		* Generate sample
		gen sample = 1 if analytic_sample == 1 & `outcome' != . & clin_dia == 1

		* Rescaling weights
			* bys country: egen sum_wpop_sample=sum(w_fbg) if sample==1   // pop weights not needed
			* gen wpopnr_sample=w_fbg*Population2015/sum_wpop_sample

			bys country: egen sum_weq_sample=sum(w_fbg) if sample==1		
			gen weqnr_sample=w_fbg/sum_weq_sample
		
		* Set svyset
		svyset psu_num[pw = weqnr_sample], strata(stratum_num) singleunit(centered)

		* Output using proportions
		svy, subpop(sample): proportion `outcome'
		matlist r(table)
		matrix u_weq_ov_clin_dia_`outcome' = r(table)
						
		
								
		* Drop variables for loop
		drop sample weqnr_sample sum_weq_sample
		}
		
* 2. Putexcel output -----------------------------------------------------------

* Create the Excel doc
cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Rural-urban paper/"
putexcel set "Putexcel tables/Appendix table - Proportion of diabetes population who are rural ${date}.xlsx", replace 
	   	   
* Make the table frame
putexcel A1 = "Country"
putexcel B1 = "Rural sample with diabetes, n"
putexcel C1 = "Urban sample with diabetes , n"
putexcel D1 = "Proportion of diabetes population who are rural, unweighted %"
putexcel E1 = "Proportion of diabetes population who are rural, weighted %"

* Macro for bottom row
	scalar bottom_row = `=scalar(n_countries)'+2	

* Sorting the dataset
	sort country_id country

* "Country"
	forval n = 1/`=scalar(n_countries)' {
		local row = `n'+1	
		putexcel A`row' = country[`n']
	}

* "Rural sample, n"
	forval n = 1/`=scalar(n_countries)' {
		local row = `n'+1	
		count if analytic_sample == 1 & rural == 1 & clin_dia == 1 & country_encoded == `n'
		local sample = r(N)
		putexcel B`row' = `sample'
	}
	putexcel B2:B`=scalar(bottom_row)',nformat(number_sep)

* "Urban sample, n"
	forval n = 1/`=scalar(n_countries)' {
		local row = `n'+1	
		count if analytic_sample == 1 & rural == 0 & clin_dia == 1 & country_encoded == `n'
		local sample = r(N)
		putexcel C`row' = `sample'
	}
	putexcel C2:C`=scalar(bottom_row)',nformat(number_sep)	

* "Proportion of diabetes population who are rural, unweighted %"
	forval n = 1/`=scalar(n_countries)' {
	
		count if country_encoded == `n' & analytic_sample == 1 & clin_dia == 1 & rural ==1
		local numerator = r(N)
		di `numerator'
		
		count if country_encoded == `n' & analytic_sample == 1 & clin_dia == 1 & rural != .
		local denominator = r(N)
		di `denominator'
		
		local proportion = `numerator'/`denominator'*100
		di `proportion'
		
		local row = `n'+1	
		
		local b = string(`proportion',"%9.1f")
		di `b'
				
		putexcel D`row' = "`b'"
	}	
	
* "Proportion of diabetes population who are rural, weighted %"
	forval n = 1/`=scalar(n_countries)' {
	
		matlist u_c_ru_clin_dia_rural_`n'
		matrix results = u_c_ru_clin_dia_rural_`n'
			
		local row = `n'+1	
		
		local b = string(results[1,2]*100,"%9.1f")
		di `b'
				
		putexcel E`row' = "`b'"
	}


* Overall rows
		
	putexcel A`=scalar(bottom_row)' = "Overall*"
		
	* "Rural sample, n"
			count if analytic_sample == 1 & rural == 1 & clin_dia == 1
			local sample = r(N)
			putexcel B`=scalar(bottom_row)' = `sample'
	
	* "Urban  sample, n"
			count if analytic_sample == 1 & rural == 0 & clin_dia == 1
			local sample = r(N)
			putexcel C`=scalar(bottom_row)' = `sample'
	
	* "Proportion of diabetes population who are rural, unweighted %"
	
		count if analytic_sample == 1 & rural ==1 & clin_dia == 1
		local numerator = r(N)
		di `numerator'
		
		count if analytic_sample == 1 & rural != . & clin_dia == 1
		local denominator = r(N)
		di `denominator'
		
		local proportion = `numerator'/`denominator'*100
		di `proportion'
		
		local row = `n'+1	
		
		local b = string(`proportion',"%9.1f")
		di `b'
				
		putexcel D`=scalar(bottom_row)' = "`b'"
	
	* "Proportion of diabetes population who are rural, weighted %"
		matlist u_weq_ov_clin_dia_rural
		matrix results = u_weq_ov_clin_dia_rural
					
		local b = string(results[1,2]*100,"%9.1f")
		di `b'
			
		putexcel E`=scalar(bottom_row)' = "`b'"


////////////////////////////////////////////////////////////////////////////////
/////////////////////////// WITHIN-COUNTRY ANALYSIS ////////////////////////////
////////////////////////////////////////////////////////////////////////////////

/*******************************************************************************
WITHIN-COUNTRY REGRESSIONS
*******************************************************************************/
	
* Adjusted rural/urban prevalence by country Poisson models

	foreach outcome in clin_dia $dependent_vars { // 

		foreach n of numlist 1(1)`=scalar(n_countries)'  {
			
			* Generate sample
			gen sample = 1 if analytic_sample == 1 & `outcome' != . & rural != . & country_encoded == `n'
		
			* Report country
			list country if country_encoded == `n' & country_id == 1
			di `n'
			
			* Number of individuals in the sample from each country with non-missing outcome
			count if (sample == 1 & country_encoded == `n')
			scalar count_outcome = r(N)
			di count_outcome
			
			* Number of distinct PSUs
			distinct psu if (sample == 1 & country_encoded == `n')
			scalar distinct = r(ndistinct) 
			di distinct
					
			* Testing whether there is sufficient variation in rural for models
			count if (sample == 1 & inlist(`outcome',1) & country_encoded == `n' & rural == 0)
			scalar count_rural_0 = r(N)
			di count_rural_0
			
			count if (sample == 1 & inlist(`outcome',1) & country_encoded == `n' & rural == 1)
			scalar count_rural_1 = r(N)
			di count_rural_1
			
			if 	(`=scalar(count_outcome)' != `=scalar(distinct)') & ///
				`=scalar(count_rural_0)' >= 1 & 					///
				`=scalar(count_rural_1)' >= 1  { // Making sure enough sample to run poisson robust modelsß
									
				* Make age spline
				mkspline age_spline = age if sample == 1, cubic nknots(3) displayknots	
					
				* If statement for svyset since some countries do not have strata, precluding standard errors
					if `n' != 13 & `n' != 33 { // country_encoded values for El Salvador and Romania
			
						* Set svyset
						svyset [pw = w_all], strata(stratum_num) singleunit(centered) // note weights are not rescaled
					}

					else  { 
					
						* Set svyset
						svyset [pw = w_all], singleunit(centered) // note weights are not rescaled
					}
				
				* Raw proportions
				svy, subpop(sample): proportion `outcome' if country_encoded == `n', over(rural)
				matlist r(table)
				matrix c_prop_`outcome'_`n' = r(table)
				
				* Regression
				svy, subpop(sample): poisson `outcome' i.rural c.age_spline* if country_encoded == `n', irr baselevels
				matlist r(table)
				matrix c_rr_`outcome'_`n' = r(table)
				
				* Predictive margins
				margins rural, vce(unconditional)
				matlist r(table)
				matrix c_pred_`outcome'_`n' = r(table)
												
				* dydx margins
				margins, dydx(rural) vce(unconditional)
				matlist r(table)
				matrix c_dydx_`outcome'_`n' = r(table)
				
				drop age_spline*
								
			}

			else {
				matrix c_rr_`outcome'_`n' = (.,. \ .,. \ .,. \ .,. \ .,.)
				matrix c_pred_`outcome'_`n' = (.,. \ .,. \ .,. \ .,. \ .,.)
				matrix c_dydx_`outcome'_`n' = (.,. \ .,. \ .,. \ .,. \ .,.)
			}		
		drop sample
		}
	}
		
/*******************************************************************************
FIGURE: WITHIN-COUNTRY REGRESSION FOREST PLOTS
*******************************************************************************/

	* 1. Generating the spreadsheet using putexcel ---------------------------------

		/*	This step creates an Excel file with the risk ratios and average marginal
			effects for the country-level regressions.
			effects for the country-level regressions.
		*/

	foreach outcome in $dependent_vars {  //							
		
		sort country_encoded

		* Create the Excel doc
		cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Rural-urban paper/Putexcel tables"
		putexcel set "country covariate plot `outcome' ${date}.xlsx", replace 
			
		* Macro for bottom row
			scalar bottom_row = `=scalar(n_countries)'+2
			putexcel A`=scalar(bottom_row)' = "Pooled"
					   
		* Column: Country
		putexcel A1 = "Country"
		sort country_id country_encoded
		forval n = 1/`=scalar(n_countries)' {
		local row = `n'+1	
		putexcel A`row' = country[`n']
		}		   

		* Column: Income group
		putexcel B1 = "WHOregionclass"
		sort country_id country_encoded
		forval n = 1/`=scalar(n_countries)' {
		local row = `n'+1	
		putexcel B`row' = WHOregionclass[`n']
		}

		* Column: Region
		putexcel C1 = "countryGDPclass"
		sort country_id country_encoded
		forval n = 1/`=scalar(n_countries)' {
		local row = `n'+1	
		putexcel C`row' = countryGDPclass[`n']
		}

		* Column: Ccode
		putexcel D1 = "ccode"
		forval n = 1/`=scalar(n_countries)' {
		local row = `n'+1
		sort country_id country_encoded
		putexcel D`row' = ccode[`n']
		}	

		* Column: gdp
		putexcel E1 = "gdp"
		forval n = 1/`=scalar(n_countries)' {
		local row = `n'+1
		sort country_id country_encoded
		putexcel E`row' = gdp[`n']
		}
		
		* Column: percent of diabetes who are rural
		putexcel F1 = "rural_percent"
		forval n = 1/`=scalar(n_countries)' {
	
		matlist u_c_ru_clin_dia_rural_`n'
		matrix results = u_c_ru_clin_dia_rural_`n'
		local row = `n'+1	
		putexcel F`row' = results[1,2]*100		
		}
		
		* Column titles -> variable names
			
			putexcel G1 = "est_rr"
			putexcel H1 = "lci_rr"
			putexcel I1 = "uci_rr"
			putexcel J1 = "p_value"
				
			putexcel K1 = "est_dydx"
			putexcel L1 = "lci_dydx"
			putexcel M1 = "uci_dydx"
			
			putexcel N1 = "est_urban_pred"
			putexcel O1 = "lci_urban_pred"
			putexcel P1 = "uci_urban_pred"
			putexcel Q1 = "est_rural_pred"
			putexcel R1 = "lci_rural_pred"
			putexcel S1 = "uci_rural_pred"
									
			* rr
			forval n = 1/`=scalar(n_countries)' {
				
					local row = 1+`n'
					di `row'
					
					matlist c_rr_`outcome'_`n'
					matrix results = c_rr_`outcome'_`n'
					
					local b = results[1,2]
					local lci = results[5,2]
					local uci = results[6,2]
					local p = results[4,2]
					
					putexcel G`row' = `b'
					putexcel H`row' = `lci'
					putexcel I`row' = `uci'
					putexcel J`row' = `p'
					
					local row = `row'+1
					di `row'
				}
			
			* dydx
			forval n = 1/`=scalar(n_countries)' {
					
					local row = 1+`n'
					di `row'
					
					matlist c_dydx_`outcome'_`n'
					matrix results = c_dydx_`outcome'_`n'
					
					local b = results[1,2]*100
					local lci = results[5,2]*100
					local uci = results[6,2]*100
					
					putexcel K`row' = `b'
					putexcel L`row' = `lci'
					putexcel M`row' = `uci'
					
					local row = `row'+1
					di `row'
				}
				
			* Column: Country estimates by rural and urban
			forval n = 1/`=scalar(n_countries)' {
						
				matlist c_pred_`outcome'_`n'
				matrix results = c_pred_`outcome'_`n'
				
				local row = `n'+1


						putexcel N`row' = results[1,1]*100
						putexcel O`row' = results[5,1]*100
						putexcel P`row' = results[6,1]*100
						putexcel Q`row' = results[1,2]*100
						putexcel R`row' = results[5,2]*100
						putexcel S`row' = results[6,2]*100		
			}		
		}	

	* 2. Constructing the rr forest plot----------------------------------------
			
	frame create covariate_plot
	frame change covariate_plot
			
	foreach outcome in $dependent_vars {  //					
			
	cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Rural-urban paper/Putexcel tables"
					
	import excel "country covariate plot `outcome' ${date}.xlsx", sheet("Sheet1") cellrange(A1:S43) firstrow clear

	* Drop if data not available
	drop if missing(lci_rr)
			
	* See help for what each of the "_USE" categories refers to	
	gen _USE = 1

	* Adding, sorting, and formatting the pooled summaries
	gsort - est_rr
			
	* Transforming for eform display in forestplot
	gen est_rr_exp = log(est_rr)
	gen lci_rr_exp = log(lci_rr)
	gen uci_rr_exp = log(uci_rr)

	* Cropping values from the predictions so not >100% or <0%
	replace lci_urban_pred = 0 if lci_urban_pred < 0
	replace lci_rural_pred = 0 if lci_rural_pred < 0
	replace uci_urban_pred = 100 if uci_urban_pred > 100
	replace uci_rural_pred = 100 if uci_rural_pred > 100
	
	* Making string variables	
	gen pvalue_s = string(p_value,"%4.3f")
	replace pvalue_s = "<0.001" if p_value <0.001
	replace pvalue_s = "" if p_value == .

	gen est_rr_s = string(est_rr,"%9.2f")
	gen lci_rr_s = string(lci_rr,"%9.2f")
	gen uci_rr_s = string(uci_rr,"%9.2f")
	gen output_rr_s = est_rr_s + " (" + lci_rr_s + " to " + uci_rr_s + ")"
	replace output_rr_s = "" if est_rr == .

	gen est_dydx_s = string(est_dydx,"%9.1f")
	gen lci_dydx_s = string(lci_dydx,"%9.1f")
	gen uci_dydx_s = string(uci_dydx,"%9.1f")
	gen output_dydx_s = est_dydx_s + " (" + lci_dydx_s + " to " + uci_dydx_s + ")"
	replace output_dydx_s = "" if est_dydx == .	
	
	gen est_urban_pred_s = string(est_urban_pred,"%9.1f")
	gen lci_urban_pred_s = string(lci_urban_pred,"%9.1f")
	gen uci_urban_pred_s = string(uci_urban_pred,"%9.1f")
	gen output_urban_pred_s = est_urban_pred_s + " (" + lci_urban_pred_s + " to " + uci_urban_pred_s + ")"
	replace output_urban_pred_s = "" if est_urban_pred == .
	
	gen est_rural_pred_s = string(est_rural_pred,"%9.1f")
	gen lci_rural_pred_s = string(lci_rural_pred,"%9.1f")
	gen uci_rural_pred_s = string(uci_rural_pred,"%9.1f")
	gen output_rural_pred_s = est_rural_pred_s + " (" + lci_rural_pred_s + " to " + uci_rural_pred_s + ")"
	replace output_rural_pred_s = "" if est_rural_pred == .
	
	* Realiigning
	leftalign

	* Adding bolded labels		
	label var Country `"`"{bf:Country}"'"'
	label var output_rr_s `"`"{bf:Risk ratio}"'"'
	label var output_dydx_s `"`"{bf:Average marginal}"' `"{bf:effect (%)}"'"'
	label var pvalue_s `"`"{bf:P value}"'"'
	label var output_urban_pred_s `"`"{bf:Urban adjusted}"' `"{bf:proportion (%)}"'"'
	label var output_rural_pred_s `"`"{bf:Rural adjusted}"' `"{bf:proportion (%)}"'"'

	/*	Note: This code uses the "forestplot" program built by David Fisher. He has a number
		of variable helpful posts on Statalist:
		
		https://www.statalist.org/forums/forum/general-stata-discussion/general/1471066-editing-headings-on-metan-forest-plots
	*/

	* recast str21 category, force
		
	local line_size ".2rs"	
		
	forestplot est_rr_exp lci_rr_exp uci_rr_exp, ///
		labels(Country) ///
		eform ///
		nostats /// this tells it not to calculate stats but rather use raw input data
		rcols(output_rr_s pvalue_s output_dydx_s output_urban_pred_s output_rural_pred_s) /// this tells forestplot which columsn to display
		range(0.01 10) /// range of the plot
		xlabel(0.01 "0.01" 0.1 "0.1" 1 "1" 10 "10", tlwidth(`line_size')) /// plot labels
		/// xmtick(0.25(0.25)4) ///
		astext(82) ///
		spacing(1.15) /// this refers to spacing by line
		/// style options
			diamopts(lwidth(`line_size')) ///
			boxopts(mcolor(none)) ///
			ciopts(lwidth(`line_size') msize(1rs)) ///
			nlineopts(lwidth(`line_size')) ///
			olineopts(lwidth(0)) ///
		pointopts(msymbol(square) msize(.3rs)) ///
		plotregion(margin(l=0 r=0 b=0rs t=0) lcolor(none)) ///
		graphregion(margin(l=0 r=0 b=0rs t=0))	///
		xsize(8.5) ///
		ysize(6) ///
		xtitle("Risk ratio", size(2rs)  margin(l=0 r=82 b=1 t=1)) ///
		name(forest_plots_`outcome',replace) //
		
	* Fixing some minor formatting issues	
	gr_edit plotregion1._xylines[1].style.editstyle linestyle(color(black)) editcopy // gray line at top

		cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Rural-urban paper/Figures"
		graph save "forest_plots_`outcome' ${date}.gph", replace         
		graph export "forest_plots_`outcome' ${date}.pdf", as(pdf) name(forest_plots_`outcome') replace
		
	}	
		
	frame change default
	frame drop covariate_plot

* 3. Constructing the abbeviated rr figures ------------------------------------
		
		frame create country_micro_plot
		frame change country_micro_plot
		
		foreach outcome in cc_dm_diag cc_gluc_low cc_bp_low cc_dm_control cc_hy_control dm_cc_double {  //
				
		cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Rural-urban paper/Putexcel tables"
		import excel "country covariate plot `outcome' ${date}.xlsx", sheet("Sheet1") cellrange(A1:S43) firstrow clear
			
		* Dropping countries with missing values
		drop if missing(est_rr)
		
		* Sorting by est_rr
		gsort - est_rr
							
		* Encodes the country in sorted order
		gen id = _n // re-doing the id based on the obs added above		
		labmask id, values(ccode) /// This encodes the id variable using the Indicator labels
		
		* Creates a scalar with the number of countries to be used in graph
		scalar N = _N
			di N
		
		* Clone lci and uci 
			clonevar uci_rr_2 = uci_rr
			clonevar lci_rr_2 = lci_rr
			clonevar est_rr_2 = est_rr
		
		* Directional arrow plot from (y1, x1) to (y2, x2) using "twoway pcarrow y1 x1 y2 x2"
			* lower bounds
			local local_lower_bound = .01
			gen lci_lower_arrow = .
			gen uci_lower_arrow = .
			replace lci_lower_arrow = `local_lower_bound' if lci_rr < `local_lower_bound'
			replace uci_lower_arrow = uci_rr if lci_rr < `local_lower_bound'
			replace lci_rr_2 = . if lci_rr < `local_lower_bound'
			replace uci_rr_2 = . if lci_rr < `local_lower_bound'
		
			* upper bounds
			local local_upper_bound = 10
			gen lci_upper_arrow = .
			gen uci_upper_arrow = .
			replace lci_upper_arrow = lci_rr if 		uci_rr > `local_upper_bound'
			replace uci_upper_arrow = `local_upper_bound' if 		uci_rr > `local_upper_bound'
			replace lci_rr_2 = . if uci_rr > `local_upper_bound'
			replace uci_rr_2 = . if uci_rr > `local_upper_bound' 
			
			* est
			gen est_arrow = .
			replace est_arrow = est_rr
			replace est_arrow = . if est_rr < `local_lower_bound' | est_rr > `local_upper_bound' // in case upper bound too negative
			replace est_rr_2 = . if est_rr < `local_lower_bound' | est_rr > `local_upper_bound' // in case upper bound too negative 
			
			* preparing double sided arrow
			gen lci_double_arrow = .
			gen uci_double_arrow = .
			replace lci_double_arrow = `local_lower_bound' if lci_rr < `local_lower_bound' & uci_rr > `local_upper_bound'
			replace uci_double_arrow = `local_upper_bound' if lci_rr < `local_lower_bound' & uci_rr > `local_upper_bound'
			replace lci_lower_arrow = . if lci_rr < `local_lower_bound' & uci_rr > `local_upper_bound'
			replace uci_lower_arrow = . if lci_rr < `local_lower_bound' & uci_rr > `local_upper_bound' 
			replace lci_upper_arrow = . if lci_rr < `local_lower_bound' & uci_rr > `local_upper_bound'
			replace uci_upper_arrow = . if lci_rr < `local_lower_bound' & uci_rr > `local_upper_bound'
				
		* Generating local variable for y axis labels
		levelsof id if WHOregionclass==1, local(WHOregionclass1)
		levelsof id if WHOregionclass==2, local(WHOregionclass2)
		levelsof id if WHOregionclass==3, local(WHOregionclass3)
		levelsof id if WHOregionclass==4, local(WHOregionclass4)
		levelsof id if WHOregionclass==5, local(WHOregionclass5)
		levelsof id if WHOregionclass==6, local(WHOregionclass6)
	
		* Local variables to facilitate construction of a nice plot
		local fontsize "5rs"
		local marker_size "2rs"
		local line_width ".3rs"
		/*	Changing colors to lancet theme:
				https://rdrr.io/cran/ggsci/man/pal_lancet.html
				https://rdrr.io/github/kueckelj/confuns/src/R/color-palettes-and-spectra.R

			hex to RGB converer: https://www.rapidtables.com/convert/color/hex-to-rgb.html
			*/
			local lancet_blue "0 70 139" // #00468BFF
			local lancet_red "237 0 0" // #ED0000FF
			local lancet_green "66 181 64" // #42B540FF
			local lancet_teal "0 153 180" // #0099B4FF
			local lancet_purple "146 94 159" // #925E9FFF
			local lancet_peach "253 175 145" // #FDAF91FF
	
		twoway ///
			(rspike lci_rr_2 uci_rr_2 id, /// 
				lcolor(black)   ///
				lwidth(`line_width')	///
				horizontal ///
				ysc(reverse)) ///
			(pcarrow uci_lower_arrow id lci_lower_arrow id, /// pcarrow y1 x1 y2 x2 <- right to left arrow
				color(black)   ///
				lwidth(`line_width')	///
				horizontal ///
				ysc(reverse)) ///
			(pcarrow lci_upper_arrow id uci_upper_arrow id, /// pcarrow y1 x1 y2 x2 -> left to right arrow
				color(black)   ///
				lwidth(`line_width')	///
				horizontal ///
				ysc(reverse)) ///
			(pcbarrow lci_double_arrow id uci_double_arrow id, /// pbcarrow y1 x1 y2 x2 -> left to right arrow
				color(black)   ///
				lwidth(`line_width')	///
				horizontal ///
				ysc(reverse)) ///	
			/// point estimates by color
				(scatter id est_rr_2 if WHOregionclass == 1,  ///
					ysc(reverse)  ///
					msymbol(square) ///
					mcolor("`lancet_blue'") ///
					msize(`marker_size')) ///
				(scatter id est_rr_2 if WHOregionclass == 2,  ///
					ysc(reverse)  ///
					msymbol(square) ///
					mcolor("`lancet_red'") ///
					msize(`marker_size')) ///
				(scatter id est_rr_2 if WHOregionclass == 3,  ///
					ysc(reverse)  ///
					msymbol(square) ///
					mcolor("`lancet_green'") ///
					msize(`marker_size')) ///
				(scatter id est_rr_2 if WHOregionclass == 4,  ///
					ysc(reverse)  ///
					msymbol(square) ///
					mcolor("`lancet_teal'") ///
					msize(`marker_size')) ///
				(scatter id est_rr_2 if WHOregionclass == 5,  ///
					ysc(reverse)  ///
					msymbol(square) ///
					mcolor("`lancet_purple'") ///
					msize(`marker_size')) ///
				(scatter id est_rr_2 if WHOregionclass == 6,  ///
					ysc(reverse)  ///
					msymbol(square) ///
					mcolor("`lancet_peach'") ///
					msize(`marker_size')) ///
		, plotregion(style(none)) ///
		xtitle("Risk ratio", margin(l=0 r=0 b=1rs t=1rs) size(`fontsize'))							///
		xscale(ra(`local_lower_bound' `local_upper_bound') log) ///
		xlabel(0.01 .1 1 10, labsize(`fontsize'))   ///
		xmtick(0.02 0.03 0.04 0.05 0.06 0.07 0.08 0.09 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 2 3 4 5 6 7 8 9) ///
		ytitle("") ///
		yscale(r(1 `=scalar(N)') noline)	///
		ylabel(1(1)`=scalar(N)', valuelabel angle(0) noticks labsize(`fontsize')) ///
			ylab(`WHOregionclass1', add labcolor(`lancet_blue')) ///
			ylab(`WHOregionclass2', add custom labcolor(`lancet_red')) ///
			ylab(`WHOregionclass3', add custom labcolor(`lancet_green')) ///
			ylab(`WHOregionclass4', add custom labcolor(`lancet_teal')) /// 
			ylab(`WHOregionclass5', add custom labcolor(`lancet_purple')) /// 
			ylab(`WHOregionclass6', add custom labcolor(`lancet_peach')) /// 
		legend(off) ///
		/// legend(order(2 4 6) label(2 "LIC") label(4 "LMIC") ///
		///		label(6 "UMIC") ///
		///	size (`fontsize') cols(1) rowgap(1rs) ring(0) position(5) title("", size(`fontsize') placement(west))  ///
		///	region(fcolor(none) lcolor(none) margin(l=.4rs r=.4rs b=3rs t=0rs))) /// 
		xline(1, lcolor(black) lwidth(.2rs)) ///
		xsize(1.875)   ///
		ysize(4)   ///
		scale(1) ///
		graphregion(margin(l=0 r=4 b=0 t=0))  ///
		name(m_forest_`outcome', replace) //		
				
							
		cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Rural-urban paper/"
		graph save "Figures/m_forest_`outcome' $date.gph", replace
		graph export "Figures/m_forest_`outcome' $date.pdf", as(pdf) name(m_forest_`outcome') replace	
		}	

		
		twoway ///
			(rspike lci_rr_2 uci_rr_2 id, /// 
				lcolor(black)   ///
				lwidth(`line_width')	///
				horizontal ///
				ysc(reverse)) ///
			(pcarrow uci_lower_arrow id lci_lower_arrow id, /// pcarrow y1 x1 y2 x2 <- right to left arrow
				color(black)   ///
				lwidth(`line_width')	///
				horizontal ///
				ysc(reverse)) ///
			(pcarrow lci_upper_arrow id uci_upper_arrow id, /// pcarrow y1 x1 y2 x2 -> left to right arrow
				color(black)   ///
				lwidth(`line_width')	///
				horizontal ///
				ysc(reverse)) ///
			(pcbarrow lci_double_arrow id uci_double_arrow id, /// pbcarrow y1 x1 y2 x2 -> left to right arrow
				color(black)   ///
				lwidth(`line_width')	///
				horizontal ///
				ysc(reverse)) ///	
			/// point estimates by color
				(scatter id est_rr_2 if WHOregionclass == 1,  ///
					ysc(reverse)  ///
					msymbol(square) ///
					mcolor("`lancet_blue'") ///
					msize(`marker_size')) ///
				(scatter id est_rr_2 if WHOregionclass == 2,  ///
					ysc(reverse)  ///
					msymbol(square) ///
					mcolor("`lancet_red'") ///
					msize(`marker_size')) ///
				(scatter id est_rr_2 if WHOregionclass == 3,  ///
					ysc(reverse)  ///
					msymbol(square) ///
					mcolor("`lancet_green'") ///
					msize(`marker_size')) ///
				(scatter id est_rr_2 if WHOregionclass == 4,  ///
					ysc(reverse)  ///
					msymbol(square) ///
					mcolor("`lancet_teal'") ///
					msize(`marker_size')) ///
				(scatter id est_rr_2 if WHOregionclass == 5,  ///
					ysc(reverse)  ///
					msymbol(square) ///
					mcolor("`lancet_purple'") ///
					msize(`marker_size')) ///
				(scatter id est_rr_2 if WHOregionclass == 6,  ///
					ysc(reverse)  ///
					msymbol(square) ///
					mcolor("`lancet_peach'") ///
					msize(`marker_size')) ///
		, plotregion(style(none)) ///
		xtitle("Risk ratio", margin(l=0 r=0 b=1rs t=1rs) size(`fontsize'))							///
		xscale(ra(`local_lower_bound' `local_upper_bound') log) ///
		xlabel(0.01 .1 1 10, labsize(`fontsize'))   ///
		xmtick(0.02 0.03 0.04 0.05 0.06 0.07 0.08 0.09 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 2 3 4 5 6 7 8 9) ///
		ytitle("") ///
		yscale(r(1 `=scalar(N)') noline)	///
		ylabel(1(1)`=scalar(N)', valuelabel angle(0) noticks labsize(`fontsize')) ///
			ylab(`WHOregionclass1', add labcolor(`lancet_blue')) ///
			ylab(`WHOregionclass2', add custom labcolor(`lancet_red')) ///
			ylab(`WHOregionclass3', add custom labcolor(`lancet_green')) ///
			ylab(`WHOregionclass4', add custom labcolor(`lancet_teal')) /// 
			ylab(`WHOregionclass5', add custom labcolor(`lancet_purple')) /// 
			ylab(`WHOregionclass6', add custom labcolor(`lancet_peach')) /// 
		/// legend(off) ///
		legend(order(5 6 7 8 9 10) ///
				label(5 "Africa") ///
				label(6 "Americas") ///
				label(7 "Eastern" "Mediterranean") ///
				label(8 "Europe") ///
				label(9 "South East" "Asia") ///
				label(10 "Western" "Pacific") ///
			size (`fontsize') cols(1) rowgap(2rs) ring(1) position(2) title("", size(`fontsize') placement(west))  ///
			region(fcolor(none) lcolor(none) margin(l=.4rs r=.4rs b=3rs t=0rs))) /// 
		xline(1, lcolor(black) lwidth(.2rs)) ///
		xsize(1.875)   ///
		ysize(4)   ///
		scale(1) ///
		graphregion(margin(l=0 r=4 b=0 t=0))  ///
		name(m_forest_plots_legend, replace) //		
		
	cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Rural-urban paper/"
		graph save "Figures/m_forest_plots_legend $date.gph", replace
		graph export "Figures/m_forest_plots_legend $date.pdf", as(pdf) name(m_forest_plots_legend) replace	
	

	frame change default
	frame drop country_micro_plot
	
////////////////////////////////////////////////////////////////////////////////
////////////////////// TABLE 1: SURVEY CHARACTERISTICS /////////////////////////
////////////////////////////////////////////////////////////////////////////////	

* Create the Excel doc
cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Rural-urban paper/"
putexcel set "Putexcel tables/Table survey characteristics ${date}.xlsx", replace 
	   	   
* Make the table frame
putexcel A1 = "Country"
putexcel B1 = "ISO code"
putexcel C1 = "Income group"
putexcel D1 = "Year"
putexcel E1 = "Response rate"
putexcel F1 = "Sample size"
putexcel G1 = "Median age (range), years"
putexcel H1 = "Proportion female, %"
putexcel I1 = "Proportion rural, %"
putexcel J1 = "Region"
putexcel K1 = "Median age, years" // range not included

* Macro for bottom row
scalar bottom_row = `=scalar(n_countries)'+2	

* Macro for bottom row minus 1
scalar bottom_row_minus_1 = `=scalar(n_countries)'+1

* Sorting the dataset
sort country_id country 

* Column: Country --------------------------------------------------------------
forval n = 1/`=scalar(n_countries)' {
local row = `n'+1	
putexcel A`row' = country[`n']
}

* Column: ISO code -------------------------------------------------------------
forval n = 1/`=scalar(n_countries)' {
local row = `n'+1	
putexcel B`row' = ccode[`n']
}

* Column: Income group ---------------------------------------------------------
forval n = 1/`=scalar(n_countries)' {
local row = `n'+1	
putexcel C`row' = countryGDPclass_string[`n']
}

* Column: Year -----------------------------------------------------------------
forval n = 1/`=scalar(n_countries)' {
local row = `n'+1	
putexcel D`row' = survey_year[`n']
}

* Column: Response rate --------------------------------------------------------
forval n = 1/`=scalar(n_countries)' {
local row = `n'+1	
putexcel E`row' = response_rate[`n']
}
putexcel E2:E`=scalar(bottom_row)',nformat(0)

* Column: Sample size ----------------------------------------------------------
forval n = 1/`=scalar(n_countries)' {
local row = `n'+1	
count if inlist(analytic_sample,1) & country_encoded == `n'
local sample = r(N)
putexcel F`row' = `sample'
}
putexcel F2:F`=scalar(bottom_row)',nformat(number_sep)

* Column: Median age (range) -----------------------------------------------------
forval n = 1/`=scalar(n_countries)' {
	local row = `n'+1	
	sum age if analytic_sample == 1 & country_encoded == `n', detail
	
	local min1 = r(min)
	di `min1'
	local min2 = string(`min1',"%9.0f")
	di `min2'
	
	local med1 = r(p50)
	di `med1'
	local med2 = string(`med1',"%9.0f")
	di `med2'
	
	local max1 = r(max)
	di `max1'
	local max2 = string(`max1',"%9.0f")
	di `max2'
	
	putexcel G`row' = "`med2' (`min2'-`max2')"
}

	* Column: Median age (range not included ) -----------------------------------------------------
	forval n = 1/`=scalar(n_countries)' {
		local row = `n'+1	
		sum age if analytic_sample == 1 & country_encoded == `n', detail
		
		local min1 = r(min)
		di `min1'
		local min2 = string(`min1',"%9.0f")
		di `min2'
		
		local med1 = r(p50)
		di `med1'
		local med2 = string(`med1',"%9.0f")
		di `med2'
		
		local max1 = r(max)
		di `max1'
		local max2 = string(`max1',"%9.0f")
		di `max2'
		
		putexcel K`row' = `med1'
	}

* Column: Rural ---------------------------------------------------------------
gen rural_percent = .
			
	* Generate sample
	gen sample = 1 if analytic_sample == 1 
	
	* Set svyset
	svyset psu_num[pw = w_fbg], strata(stratum_num) singleunit(centered) // note weights are not rescaled

	* Output using proportions
	svy, subpop(sample): proportion rural, over(country_encoded)
	matlist r(table)
	matrix rural_country = r(table)
	
	drop sample
	
forval n = 1/`=scalar(n_countries)' { //		
	local row = `n'+1
	local country_matrix_column = `=scalar(n_countries)' +`n'
	local loc_rural = rural_country[1,`country_matrix_column']*100
	di `loc_rural'
	replace rural_percent = `loc_rural' if country_encoded == `n'
	local loc_rural_string = string(`loc_rural',"%9.1f")
	dis `loc_rural_string'
	putexcel H`row' = `loc_rural_string'
}
putexcel H2:H`=scalar(bottom_row)',nformat(0)

* Column: Female ---------------------------------------------------------------
gen female_percent = .
			
	* Generate sample
	gen sample = 1 if analytic_sample == 1 
	
	* Set svyset
	svyset psu_num[pw = w_fbg], strata(stratum_num) singleunit(centered) // note weights are not rescaled

	* Output using proportions
	svy, subpop(sample): proportion sex, over(country_encoded)
	matlist r(table)
	matrix female_country = r(table)
	
	drop sample
	
forval n = 1/`=scalar(n_countries)' { //		
	local row = `n'+1
	local country_matrix_column = `=scalar(n_countries)' +`n'
	local loc_female = female_country[1,`country_matrix_column']*100
	di `loc_female'
	replace female_percent = `loc_female' if country_encoded == `n'
	local loc_female_string = string(`loc_female',"%9.1f")
	dis `loc_female_string'
	putexcel I`row' = `loc_female_string'
}
putexcel I2:I`=scalar(bottom_row)',nformat(0)

* Column: Region ---------------------------------------------------------------
sort country_id country_encoded
forval n = 1/`=scalar(n_countries)' {
	local row = `n'+1	
	putexcel J`row' = WHOregionclass_string[`n']
	}

* Summary row at bottom --------------------------------------------------------
	
* Overall total label
	putexcel A`=scalar(bottom_row)' = "Overall total"
	
* Sample all
	tab analytic_sample, matcell(sample)
	matlist sample
	local sample = sample[1,2]
	dis `sample'
	putexcel F`=scalar(bottom_row)' = `sample',nformat(number_sep)
	
* Median (IQR) % rural
frame create rural_median
frame change rural_median
cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Rural-urban paper/"
import excel "Putexcel tables/Table survey characteristics ${date}.xlsx", sheet("Sheet1") cellrange(A1:K`=scalar(bottom_row_minus_1)') firstrow clear

* Responserate
	sum Responserate, d
		local min1 = r(p25)
		di `min1'
		local min2 = string(`min1',"%9.0f")
		di `min2'
		
		local med1 = r(p50)
		di `med1'
		local med2 = string(`med1',"%9.0f")
		di `med2'
		
		local max1 = r(p75)
		di `max1'
		local max2 = string(`max1',"%9.0f")
		di `max2'
	putexcel E`=scalar(bottom_row)' = "`med2' (`min2'-`max2')"

* Medianage
	sum Medianageyears, d
		local min1 = r(p25)
		di `min1'
		local min2 = string(`min1',"%9.0f")
		di `min2'
		
		local med1 = r(p50)
		di `med1'
		local med2 = string(`med1',"%9.0f")
		di `med2'
		
		local max1 = r(p75)
		di `max1'
		local max2 = string(`max1',"%9.0f")
		di `max2'
	putexcel G`=scalar(bottom_row)' = "`med2' (`min2'-`max2')"		
	
* Proportionfemale
	sum Proportionfemale, d
		local min1 = r(p25)
		di `min1'
		local min2 = string(`min1',"%9.0f")
		di `min2'
		
		local med1 = r(p50)
		di `med1'
		local med2 = string(`med1',"%9.0f")
		di `med2'
		
		local max1 = r(p75)
		di `max1'
		local max2 = string(`max1',"%9.0f")
		di `max2'
	putexcel H`=scalar(bottom_row)' = "`med2' (`min2'-`max2')"	
	
* Proportionrural
	sum Proportionrural, d
		local min1 = r(p25)
		di `min1'
		local min2 = string(`min1',"%9.0f")
		di `min2'
		
		local med1 = r(p50)
		di `med1'
		local med2 = string(`med1',"%9.0f")
		di `med2'
		
		local max1 = r(p75)
		di `max1'
		local max2 = string(`max1',"%9.0f")
		di `max2'
	putexcel I`=scalar(bottom_row)' = "`med2' (`min2'-`max2')"
			
frame change default
frame drop rural_median	
	
* Variables that can be dropped now
drop rural_percent 
drop female_percent

////////////////////////////////////////////////////////////////////////////////
///////////////////// FIGURES: MAP OF INCLUDED COUNTRIES ///////////////////////
////////////////////////////////////////////////////////////////////////////////	

* 0. Defining the availability of data -----------------------------------------

egen n_clin_dia = count(clin_dia), by(country)
list country n_clin_dia if country_id == 1

gen n_variables = 0
	replace n_variables = 1 if n_clin_dia >= 1
	
list country n_variables if country_id == 1

* 1. Generating the spreadsheet using putexcel ---------------------------------

	/*	This step creates an Excel file with the risk ratios and average marginal
		effects for the aggregate weq regression output.
	*/

sort country_encoded

* Create the Excel doc
cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Rural-urban paper/Putexcel tables"
putexcel set "map data ${date}.xlsx", replace 

* Column: Country
putexcel A1 = "Country"
sort country_id country_encoded
forval n = 1/`=scalar(n_countries)' {
local row = `n'+1	
putexcel A`row' = country[`n']
}		   

* Column: Income group
putexcel B1 = "WHOregionclass"
sort country_id country_encoded
forval n = 1/`=scalar(n_countries)' {
local row = `n'+1	
putexcel B`row' = WHOregionclass[`n']
}

* Column: Region
putexcel C1 = "countryGDPclass"
sort country_id country_encoded
forval n = 1/`=scalar(n_countries)' {
local row = `n'+1	
putexcel C`row' = countryGDPclass[`n']
}

* Column: Ccode
putexcel D1 = "ccode"
forval n = 1/`=scalar(n_countries)' {
local row = `n'+1
sort country_id country_encoded
putexcel D`row' = ccode[`n']
}	

* Column: iso3n
putexcel E1 = "iso3n"
forval n = 1/`=scalar(n_countries)' {
local row = `n'+1
sort country_id country_encoded
putexcel E`row' = iso3n[`n']
}

* Column: n_variables
putexcel F1 = "n_variables"

forval n = 1/`=scalar(n_countries)' {
local row = `n'+1	
sort country_id country_encoded
putexcel F`row' = n_variables[`n']
}

* Making a new frame
frame create map
frame change map

* 2. Making country categories

/* This code imports World Bank data, then constructs a dataset that indicates in
 a binaryt variably "hic" if a country is a HIC and thus should be shaded differently in the map. */

import excel "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Rural-urban paper/Maps/World Bank Income levels.xls", sheet("Country Analytical History") cellrange(A5:IV229) firstrow clear
rename A ccode
rename Banksfiscalyear country
drop if country == "Data for calendar year :"
drop if country == "Low income (L)"
drop if country == "Lower middle income (LM)"
drop if country == "Upper middle income (UM)"
drop if country == "High income (H)"
drop if country == ""

keep ccode country FY10 FY11 FY12 FY13 FY14 FY15 FY16 FY17 FY18 FY19 FY20 FY21

* Encoding each year based on income group
foreach var of varlist FY* {
	generate `var'_encoded = .
	replace `var'_encoded = 1 if `var' == "H"
	replace `var'_encoded = 2 if `var' == "UM"
	replace `var'_encoded = 3 if `var' == "LM"
	replace `var'_encoded = 4 if `var' == "L"
	drop `var'
	tab `var'_encoded
	}

* Generating a single variable if a country is only a HIC or blank
gen hic = 0
replace hic = 1 if ///
	inlist(FY10_encoded,.,1) & ///
	inlist(FY11_encoded,.,1) & ///
	inlist(FY12_encoded,.,1) & ///
	inlist(FY13_encoded,.,1) & ///
	inlist(FY14_encoded,.,1) & ///
	inlist(FY15_encoded,.,1) & ///
	inlist(FY16_encoded,.,1) & ///
	inlist(FY17_encoded,.,1) & ///
	inlist(FY18_encoded,.,1) & ///
	inlist(FY19_encoded,.,1) & ///
	inlist(FY20_encoded,.,1) & ///
	inlist(FY21_encoded,.,1)  //
	
* Generating iso3n for merging later
kountry ccode, from(iso3c) to(iso3n)
rename _ISO3N_ iso3n
	
* Correcting a few country codes that are outdated in Kountry:
replace iso3n = 643 if country == "Russian Federation"
replace iso3n = 729 if country == "Sudan"
replace iso3n = 688 if country == "Serbia"

sort iso3n
list country ccode iso3n hic

drop if iso3n == .

keep iso3n hic

save "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Rural-urban paper/Maps/high income data ${date}.dta", replace

* 3. Constructs a shape file from IPUMS for the World Map we will use

	* https://international.ipums.org/international/gis.shtml

cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Rural-urban paper/Maps"

/* Excellent resources for making these maps
1. https://blog.stata.com/2020/04/07/how-to-create-choropleth-maps-using-the-covid-19-data-from-johns-hopkins-university/
2. https://www.youtube.com/watch?v=PG4s5acfJ5Y
3. https://www.statalist.org/forums/forum/general-stata-discussion/general/1447786-spmap-create-rectangles-to-show-labels-and-colors-for-small-jurisdictions-in-choropleth-maps
4. 
*/

* Creating dta file from IPUMS map and making numeric country code equivalent

/* slow step and not needed each time
spshape2dta world_countries_2020, saving(countries) replace
use countries.dta, replace
rename CNTRY_CODE iso3n  // making the numeric country code match
destring iso3n, replace
save countries.dta, replace
*/

* 4. Preparing the data for the map

* Importing and converting the HPACC output data for use in merge below
import excel "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Rural-urban paper/Putexcel tables/map data ${date}.xlsx", sheet("Sheet1") cellrange(A1:F43) firstrow clear
save "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Rural-urban paper/Maps/map data ${date}.dta", replace
 
* Merging the country shape data from IPUMS into HPACC output data 
merge 1:m iso3n using "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Rural-urban paper/Maps/countries.dta"

tab _merge
sort _merge 
list Country _merge iso3n

drop if iso3n == 836 // Zanzibar will not be included in this map as it is not a country technically

drop _merge

* Merging the World Bank category data to shade HICs white (need to use m:1 because of multiple Palestine rows here)
merge m:1 iso3n using "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Rural-urban paper/Maps/high income data ${date}.dta"
drop _merge

* Preparing the data for the map
replace n_variables = 0 if n_variables == .

* Prepating the chloropleth categories for the map below
gen cl_cat = .
replace cl_cat = 2
replace cl_cat = 3 if hic == 1
replace cl_cat = 1 if inlist(n_variables,1,2,3)


* 5. Making the world map ------------------------------------------------------	

	/* Demo playground for grmap that loads MUCH faster
	
frame create map
frame change map

cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Rural-urban paper/Maps/Help maps"
	use "italy-regionsdata.dta", clear
	gen cl_cat = runiformint(1,5)

	label variable cl_cat "Country category"
	label define cl_cat_label 1 "Not an LMIC" 2 "No data" 3 "Data on 1 condition" 4 "Data on 2 conditions" 5 "Data on 3 conditions",  modify
	label values cl_cat cl_cat_label

	gen region_bold = "{bf:" + region + "}"
	
	grmap cl_cat, ///
		clmethod(unique) ///
		fcolor(white black*0.08 green*0.2 green*0.4 green*0.6) ///
		plotregion(icolor(eltblue*0.25)) graphregion(icolor(eltblue*0.25)) ///
		osize(0.05rs 0.05rs 0.05rs 0.05rs 0.05rs)  ///
		ocolor(black*0.5 black*0.5 black*0.5 black*0.5 black*0.5)  ///
		ndocolor(dkorange) ///
		legend(label(1 "Not an LMIC") label(2 "No data") label(3 "Data on" "1 condition") label (4 "Data on" "2 conditions") label(5 "Data on" "3 conditions") cols(1) position(8) ///
			bmargin(l=10 r=0 b=20 t=0) colgap(1rs) keygap(1rs) size(2.5rs) ///
			region(fcolor(white%80) lwidth(.1rs) lcolor(black) margin(l=2 r=2 b=2 t=2)) ///
			rowgap(2rs) ///
		margin(l=0 r=0 b=0 t=0)) ///
		label(label(region) xcoord(_CX)  ycoord(_CY) size(1.8rs)) // pos(12 0) ) //
		
	*/

* Labels the chloropleth variable for the legend in grmap
label variable cl_cat "Country category"
label define cl_cat_label 1 "No data" 2 "Not an LMIC" 3 "Included",  modify
label values cl_cat cl_cat_label

* Renames coordinate variables
gen ccode_included = ccode if inlist(cl_cat,2,3,4)

* Option for bolding value label
gen ccode_included_bold = "{bf:" + ccode_included + "}"

cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Rural-urban paper/Maps"
grmap cl_cat, ///
	clmethod(unique) ///
	fcolor(green*0.4 white black*0.08 ) ///
	plotregion(icolor(eltblue*0.25)) graphregion(icolor(eltblue*0.25)) ///
	osize(.03rs .03rs .03rs)  ///
	ocolor(black black black black black)  ///
	legend(label(2 "Included country") label(3 "No data") label(4 "Not a low- or" "middle-income" "country") ///
		cols(1)  ///
		position(8) ///
		bmargin(l=25 r=0 b=25 t=0) colgap(0.8rs) keygap(1rs) size(2.5rs) ///
		region(fcolor(white%0) lwidth(.0rs) lcolor(black) margin(l=2 r=2 b=2 t=2)) ///
		rowgap(1rs) ///
		margin(l=0 r=0 b=0 t=0) ///
		symplacement(north)) /// 
	/// label(label(none) xcoord(_CX)  ycoord(_CY) size(1.8rs) color(black)) /// pos(12 0) ) ///
	name(rural_map, replace)

	cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Rural-urban paper/"
	graph save "Figures/rural map $date.gph", replace
		graph export "Figures/rural map $date.pdf", as(pdf) name(rural_map) replace

frame change default
frame drop map	

/*******************************************************************************
Appendix table - Rural sample
*******************************************************************************/

* 1. Calculations --------------------------------------------------------------
	
* Unadjusted rural/urban prevalence by country

	foreach n of numlist 1(1)`=scalar(n_countries)'  { // 
			
		* Generate sample
		gen sample = 1 if analytic_sample == 1 & rural != . & country_encoded == `n'
		
		* Output using proportions for countries with stratified samples
		if `n' != 12 & `n' != 31 { // country_encoded values for El Salvador and Romania
		
			* Set svyset
			svyset psu_num[pw = w_fbg], strata(stratum_num) singleunit(centered) // note weights are not rescaled

			svy, subpop(sample): proportion rural
			matlist r(table)
			matrix u_c_ru_prev_rural_`n' = r(table)
		}

		* Output using proportions for countries without stratified samples
		else  { 
		
			* Set svyset
			svyset [pw = w_fbg], singleunit(centered) // note weights are not rescaled

			svy, subpop(sample): proportion rural
			matlist r(table)
			matrix u_c_ru_prev_rural_`n' = r(table)
		}
		drop sample				
	}

	
* Pooled equal weights

	foreach outcome of varlist rural {
				
		* Generate sample
		gen sample = 1 if analytic_sample == 1 & `outcome' != .

		* Rescaling weights
			* bys country: egen sum_wpop_sample=sum(w_fbg) if sample==1   // pop weights not needed
			* gen wpopnr_sample=w_fbg*Population2015/sum_wpop_sample

			bys country: egen sum_weq_sample=sum(w_fbg) if sample==1		
			gen weqnr_sample=w_fbg/sum_weq_sample
		
		* Set svyset
		svyset psu_num[pw = weqnr_sample], strata(stratum_num) singleunit(centered)

		* Output using proportions
		svy, subpop(sample): proportion `outcome'
		matlist r(table)
		matrix u_weq_ov_prev_`outcome' = r(table)
						
		
								
		* Drop variables for loop
		drop sample weqnr_sample sum_weq_sample
		}
		
* 2. Putexcel output -----------------------------------------------------------

* Create the Excel doc
cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Rural-urban paper/"
putexcel set "Putexcel tables/Appendix table - Prevalence of rural ${date}.xlsx", replace 
	   	   
* Make the table frame
putexcel A1 = "Country"
putexcel B1 = "Rural sample, n"
putexcel C1 = "Urban sample, n"
putexcel D1 = "Prevalence of rural, unweighted %"
putexcel E1 = "Prevalence of rural, weighted %"

* Macro for bottom row
	scalar bottom_row = `=scalar(n_countries)'+2	

* Sorting the dataset
	sort country_id country

* "Country"
	forval n = 1/`=scalar(n_countries)' {
		local row = `n'+1	
		putexcel A`row' = country[`n']
	}

* "Rural sample, n"
	forval n = 1/`=scalar(n_countries)' {
		local row = `n'+1	
		count if analytic_sample == 1 & rural == 1 & country_encoded == `n'
		local sample = r(N)
		putexcel B`row' = `sample'
	}
	putexcel B2:B`=scalar(bottom_row)',nformat(number_sep)

* "Urban sample, n"
	forval n = 1/`=scalar(n_countries)' {
		local row = `n'+1	
		count if analytic_sample == 1 & rural == 0 & country_encoded == `n'
		local sample = r(N)
		putexcel C`row' = `sample'
	}
	putexcel C2:C`=scalar(bottom_row)',nformat(number_sep)	

* "Prevalence of rural, unweighted %"
	forval n = 1/`=scalar(n_countries)' {
	
		count if country_encoded == `n' & analytic_sample == 1 & rural ==1
		local numerator = r(N)
		di `numerator'
		
		count if country_encoded == `n' & analytic_sample == 1 & rural != .
		local denominator = r(N)
		di `denominator'
		
		local proportion = `numerator'/`denominator'*100
		di `proportion'
		
		local row = `n'+1	
		
		local b = string(`proportion',"%9.1f")
		di `b'
				
		putexcel D`row' = "`b'"
	}	
	
* "Prevalence of rural, weighted %"
	forval n = 1/`=scalar(n_countries)' {
	
		matlist u_c_ru_prev_rural_`n'
		matrix results = u_c_ru_prev_rural_`n'
			
		local row = `n'+1	
		
		local b = string(results[1,2]*100,"%9.1f")
		di `b'
				
		putexcel E`row' = "`b'"
	}


* Overall rows
		
	putexcel A`=scalar(bottom_row)' = "Overall*"
		
	* "Rural sample, n"
			count if analytic_sample == 1 & rural == 1
			local sample = r(N)
			putexcel B`=scalar(bottom_row)' = `sample'
	
	* "Urban  sample, n"
			count if analytic_sample == 1 & rural == 0
			local sample = r(N)
			putexcel C`=scalar(bottom_row)' = `sample'
	
	* "Prevalence of rural, unweighted %"
	
		count if analytic_sample == 1 & rural ==1
		local numerator = r(N)
		di `numerator'
		
		count if analytic_sample == 1 & rural != .
		local denominator = r(N)
		di `denominator'
		
		local proportion = `numerator'/`denominator'*100
		di `proportion'
		
		local row = `n'+1	
		
		local b = string(`proportion',"%9.1f")
		di `b'
				
		putexcel D`=scalar(bottom_row)' = "`b'"
	
	* "Prevalence of rural, weighted %"
		matlist u_weq_ov_prev_rural
		matrix results = u_weq_ov_prev_rural
					
		local b = string(results[1,2]*100,"%9.1f")
		di `b'
			
		putexcel E`=scalar(bottom_row)' = "`b'"

/*******************************************************************************
Appendix table - Sample characteristics by covariates
*******************************************************************************/

gen clin_dia_hbg = 1 if clin_dia == 1 & hbg_new == 1

* Create the Excel doc
cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/rural-urban paper/"
putexcel set "Putexcel tables/Appendix table sample characteristics ${date}.xlsx", replace 
	   	   
* Make the table frame
putexcel A1 = "Characteristic"
putexcel A2 = "Age"
putexcel A3	= "<30 years"
putexcel A4	= "30-39 years"
putexcel A5	= "40-49 years"
putexcel A6	= "50-59 years"
putexcel A7	= "60-69 years"
putexcel A8 = "Sex"
putexcel A9 = "Male"
putexcel A10 = "Female"
putexcel A11 = "Education"
putexcel A12 = "No schooling"
putexcel A13 = "Primary education"
putexcel A14 = "Secondary or above"
putexcel A15 = "Rural vs. urban residence"
putexcel A16 = "Urban"
putexcel A17 = "Rural"
putexcel A18 = "Overall"

putexcel B1 = "Total sample"
putexcel D1 = "Diabetes sample"
putexcel F1 = "Diagnosed diabetes sample"

putexcel B2 = "n"
putexcel D2 = "n"
putexcel F2 = "n"

putexcel C2 = "Weighted %"
putexcel E2 = "Weighted %"
putexcel G2 = "Weighted %"

putexcel C18 = 100
putexcel E18 = 100
putexcel G18 = 100


* Macro for bottom row
scalar bottom_row = `=scalar(n_countries)'+2	

* Sorting the dataset
sort country_id country 

* n  ---------------------------------------------------------------------------

	* age_cat

		tab age_cat if analytic_sample == 1, matcell(matrix_sample_table)
		matlist matrix_sample_table
		putexcel B3 = matrix_sample_table[1,1]
		putexcel B4 = matrix_sample_table[2,1]
		putexcel B5 = matrix_sample_table[3,1]
		putexcel B6 = matrix_sample_table[4,1]
		putexcel B7 = matrix_sample_table[5,1]
		
		tab age_cat if analytic_sample == 1 & clin_dia == 1, matcell(matrix_sample_table)
		matlist matrix_sample_table
		putexcel D3 = matrix_sample_table[1,1]
		putexcel D4 = matrix_sample_table[2,1]
		putexcel D5 = matrix_sample_table[3,1]
		putexcel D6 = matrix_sample_table[4,1]
		putexcel D7 = matrix_sample_table[5,1]
		
		tab age_cat if analytic_sample == 1 & clin_dia_hbg == 1, matcell(matrix_sample_table)
		matlist matrix_sample_table
		putexcel F3 = matrix_sample_table[1,1]
		putexcel F4 = matrix_sample_table[2,1]
		putexcel F5 = matrix_sample_table[3,1]
		putexcel F6 = matrix_sample_table[4,1]
		putexcel F7 = matrix_sample_table[5,1]
	
	* sex

		tab sex if analytic_sample == 1, matcell(matrix_sample_table)
		matlist matrix_sample_table
		putexcel B9 = matrix_sample_table[1,1]
		putexcel B10 = matrix_sample_table[2,1]
				
		tab sex if clin_dia == 1 & analytic_sample == 1, matcell(matrix_sample_table)
		matlist matrix_sample_table
		putexcel D9 = matrix_sample_table[1,1]
		putexcel D10 = matrix_sample_table[2,1]
				
		tab sex if clin_dia_hbg == 1 & analytic_sample == 1, matcell(matrix_sample_table)
		matlist matrix_sample_table
		putexcel F9 = matrix_sample_table[1,1]
		putexcel F10 = matrix_sample_table[2,1]
				
	* educat3

		tab educat3 if analytic_sample == 1, matcell(matrix_sample_table)
		matlist matrix_sample_table
		putexcel B12 = matrix_sample_table[1,1]
		putexcel B13 = matrix_sample_table[2,1]
		putexcel B14 = matrix_sample_table[3,1]
		
		tab educat3 if clin_dia == 1 & analytic_sample == 1, matcell(matrix_sample_table)
		matlist matrix_sample_table
		putexcel D12 = matrix_sample_table[1,1]
		putexcel D13 = matrix_sample_table[2,1]
		putexcel D14 = matrix_sample_table[3,1]
		
		tab educat3 if clin_dia_hbg == 1 & analytic_sample == 1, matcell(matrix_sample_table)
		matlist matrix_sample_table
		putexcel F12 = matrix_sample_table[1,1]
		putexcel F13 = matrix_sample_table[2,1]
		putexcel F14 = matrix_sample_table[3,1]
	
	* rural

		tab rural if analytic_sample == 1, matcell(matrix_sample_table)
		matlist matrix_sample_table
		putexcel B16 = matrix_sample_table[1,1]
		putexcel B17 = matrix_sample_table[2,1]
				
		tab rural if clin_dia == 1 & analytic_sample == 1, matcell(matrix_sample_table)
		matlist matrix_sample_table
		putexcel D16 = matrix_sample_table[1,1]
		putexcel D17 = matrix_sample_table[2,1]
				
		tab rural if  clin_dia_hbg == 1 & analytic_sample == 1, matcell(matrix_sample_table)
		matlist matrix_sample_table
		putexcel F16 = matrix_sample_table[1,1]
		putexcel F17 = matrix_sample_table[2,1]
		
	* total
	
		tab analytic_sample, matcell(matrix_sample_table)
		matlist matrix_sample_table
		putexcel B18 = matrix_sample_table[2,1]
		
		tab analytic_sample if clin_dia == 1, matcell(matrix_sample_table)
		matlist matrix_sample_table
		putexcel D18 = matrix_sample_table[2,1]
		
		tab analytic_sample if clin_dia_hbg == 1, matcell(matrix_sample_table)
		matlist matrix_sample_table
		putexcel F18 = matrix_sample_table[2,1]
		
	* Formatting	
	putexcel B2:B18,nformat(number_sep)
	putexcel D2:D18,nformat(number_sep)
	putexcel F2:F18,nformat(number_sep)
	
* Weighted for total and clin_dia ---------------------------------------------

	* age_cat
	
		* analytic_sample
		foreach var of varlist analytic_sample  {

			* Sample weights
			gen sample = 1 if `var' == 1 
		
		* Rescaling weights
			* bys country: egen sum_wpop_sample=sum(w_fbg) if sample==1   // pop weights not needed
			* gen weqnr_sample=w_fbg*Population2015/sum_wpop_sample

			bys country: egen sum_weq_sample=sum(w_fbg) if sample==1		
			gen weqnr_sample=w_fbg/sum_weq_sample
			
			* Set svyset
			svyset psu_num [pw = weqnr_sample], strata(stratum_num) singleunit(centered)
				
			* Estimates by country
			svy, subpop(sample): proportion age_cat
			matrix matrix_sample_table = r(table)
					
			* Drop variables for loop
			drop weqnr_sample sum_weq_sample sample
		}	
		
			matlist matrix_sample_table
					
			local b = string(matrix_sample_table[1,1]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,1]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,1]*100,"%9.1f")
			putexcel C3 = "`b' (`lci'-`uci')"
		
			local b = string(matrix_sample_table[1,2]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,2]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,2]*100,"%9.1f")
			putexcel C4 = "`b' (`lci'-`uci')"
			
			local b = string(matrix_sample_table[1,3]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,3]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,3]*100,"%9.1f")
			putexcel C5 = "`b' (`lci'-`uci')"
			
			local b = string(matrix_sample_table[1,4]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,4]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,4]*100,"%9.1f")
			putexcel C6 = "`b' (`lci'-`uci')"
			
			local b = string(matrix_sample_table[1,5]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,5]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,5]*100,"%9.1f")
			putexcel C7 = "`b' (`lci'-`uci')"
			
		* clin_dia
		foreach var of varlist clin_dia  {
			
			* Sample weights
			gen sample = 1 if `var' == 1 & analytic_sample == 1
		
			* Rescaling weights
				* bys country: egen sum_wpop_sample=sum(w_fbg) if sample==1   // pop weights not needed
				* gen weqnr_sample=w_fbg*Population2015/sum_wpop_sample

				bys country: egen sum_weq_sample=sum(w_fbg) if sample==1		
				gen weqnr_sample=w_fbg/sum_weq_sample
			
			* Set svyset
			svyset psu_num [pw = weqnr_sample], strata(stratum_num) singleunit(centered)
				
			* Estimates by country
			svy, subpop(sample): proportion age_cat
			matrix matrix_sample_table = r(table)
					
			* Drop variables for loop
			drop weqnr_sample sum_weq_sample sample
		}	
		
			matlist matrix_sample_table
					
			local b = string(matrix_sample_table[1,1]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,1]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,1]*100,"%9.1f")
			putexcel E3 = "`b' (`lci'-`uci')"
		
			local b = string(matrix_sample_table[1,2]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,2]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,2]*100,"%9.1f")
			putexcel E4 = "`b' (`lci'-`uci')"
			
			local b = string(matrix_sample_table[1,3]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,3]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,3]*100,"%9.1f")
			putexcel E5 = "`b' (`lci'-`uci')"
			
			local b = string(matrix_sample_table[1,4]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,4]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,4]*100,"%9.1f")
			putexcel E6 = "`b' (`lci'-`uci')"
			
			local b = string(matrix_sample_table[1,4]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,4]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,4]*100,"%9.1f")
			putexcel E7 = "`b' (`lci'-`uci')"
			
		* clin_dia_hbg
		foreach var of varlist clin_dia_hbg  {
			
			* Sample weights
			gen sample = 1 if `var' == 1 & analytic_sample == 1
						
			* Rescaling weights
				* bys country: egen sum_wpop_sample=sum(w_fbg) if sample==1   // pop weights not needed
				* gen weqnr_sample=w_fbg*Population2015/sum_wpop_sample

				bys country: egen sum_weq_sample=sum(w_fbg) if sample==1		
				gen weqnr_sample=w_fbg/sum_weq_sample
			
			* Set svyset
			svyset psu_num [pw = weqnr_sample], strata(stratum_num) singleunit(centered)
				
			* Estimates by country
			svy, subpop(sample): proportion age_cat
			matrix matrix_sample_table = r(table)
					
			* Drop variables for loop
			drop weqnr_sample sum_weq_sample sample
		}	
		
			matlist matrix_sample_table
					
			local b = string(matrix_sample_table[1,1]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,1]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,1]*100,"%9.1f")
			putexcel G3 = "`b' (`lci'-`uci')"
		
			local b = string(matrix_sample_table[1,2]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,2]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,2]*100,"%9.1f")
			putexcel G4 = "`b' (`lci'-`uci')"
			
			local b = string(matrix_sample_table[1,3]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,3]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,3]*100,"%9.1f")
			putexcel G5 = "`b' (`lci'-`uci')"
			
			local b = string(matrix_sample_table[1,4]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,4]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,4]*100,"%9.1f")
			putexcel G6 = "`b' (`lci'-`uci')"
			
			local b = string(matrix_sample_table[1,5]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,5]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,5]*100,"%9.1f")
			putexcel G7 = "`b' (`lci'-`uci')"

	* sex
		
		* analytic_sample
		foreach var of varlist analytic_sample  {

			* Sample weights
			gen sample = 1 if `var' == 1 
			
			* Rescaling weights
				* bys country: egen sum_wpop_sample=sum(w_fbg) if sample==1   // pop weights not needed
				* gen weqnr_sample=w_fbg*Population2015/sum_wpop_sample

				bys country: egen sum_weq_sample=sum(w_fbg) if sample==1		
				gen weqnr_sample=w_fbg/sum_weq_sample
			
			* Set svyset
			svyset psu_num [pw = weqnr_sample], strata(stratum_num) singleunit(centered)
				
			* Estimates by country
			svy, subpop(sample): proportion sex
			matrix matrix_sample_table = r(table)
					
			* Drop variables for loop
			drop weqnr_sample sum_weq_sample sample
		}	
		
			matlist matrix_sample_table
					
			local b = string(matrix_sample_table[1,1]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,1]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,1]*100,"%9.1f")
			putexcel C9 = "`b' (`lci'-`uci')"
		
			local b = string(matrix_sample_table[1,2]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,2]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,2]*100,"%9.1f")
			putexcel C10 = "`b' (`lci'-`uci')"
				
		* clin_dia
		foreach var of varlist clin_dia  {
			
			* Sample weights
			gen sample = 1 if `var' == 1 & analytic_sample == 1
			
			* Rescaling weights
				* bys country: egen sum_wpop_sample=sum(w_fbg) if sample==1   // pop weights not needed
				* gen weqnr_sample=w_fbg*Population2015/sum_wpop_sample

				bys country: egen sum_weq_sample=sum(w_fbg) if sample==1		
				gen weqnr_sample=w_fbg/sum_weq_sample
			
			* Set svyset
			svyset psu_num [pw = weqnr_sample], strata(stratum_num) singleunit(centered)
				
			* Estimates by country
			svy, subpop(sample): proportion sex
			matrix matrix_sample_table = r(table)
					
			* Drop variables for loop
			drop weqnr_sample sum_weq_sample sample
		}	
		
			matlist matrix_sample_table
					
			local b = string(matrix_sample_table[1,1]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,1]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,1]*100,"%9.1f")
			putexcel E9 = "`b' (`lci'-`uci')"
		
			local b = string(matrix_sample_table[1,2]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,2]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,2]*100,"%9.1f")
			putexcel E10 = "`b' (`lci'-`uci')"
					
		* clin_dia_hbg
		foreach var of varlist clin_dia_hbg  {
			
			* Sample weights
			gen sample = 1 if `var' == 1 & analytic_sample == 1
			
			* Rescaling weights
				* bys country: egen sum_wpop_sample=sum(w_fbg) if sample==1   // pop weights not needed
				* gen weqnr_sample=w_fbg*Population2015/sum_wpop_sample

				bys country: egen sum_weq_sample=sum(w_fbg) if sample==1		
				gen weqnr_sample=w_fbg/sum_weq_sample
			
			* Set svyset
			svyset psu_num [pw = weqnr_sample], strata(stratum_num) singleunit(centered)
				
			* Estimates by country
			svy, subpop(sample): proportion sex
			matrix matrix_sample_table = r(table)
					
			* Drop variables for loop
			drop weqnr_sample sum_weq_sample sample
		}	
		
			matlist matrix_sample_table
					
			local b = string(matrix_sample_table[1,1]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,1]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,1]*100,"%9.1f")
			putexcel G9 = "`b' (`lci'-`uci')"
		
			local b = string(matrix_sample_table[1,2]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,2]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,2]*100,"%9.1f")
			putexcel G10 = "`b' (`lci'-`uci')"
				
	* educat3

		* analytic_sample
		foreach var of varlist analytic_sample  {

			* Sample weights
			gen sample = 1 if `var' == 1 
			
			* Rescaling weights
				* bys country: egen sum_wpop_sample=sum(w_fbg) if sample==1   // pop weights not needed
				* gen weqnr_sample=w_fbg*Population2015/sum_wpop_sample

				bys country: egen sum_weq_sample=sum(w_fbg) if sample==1		
				gen weqnr_sample=w_fbg/sum_weq_sample
			
			* Set svyset
			svyset psu_num [pw = weqnr_sample], strata(stratum_num) singleunit(centered)
				
			* Estimates by country
			svy, subpop(sample): proportion educat3
			matrix matrix_sample_table = r(table)
					
			* Drop variables for loop
			drop weqnr_sample sum_weq_sample sample
		}	
		
			matlist matrix_sample_table
					
			local b = string(matrix_sample_table[1,1]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,1]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,1]*100,"%9.1f")
			putexcel C12 = "`b' (`lci'-`uci')"
		
			local b = string(matrix_sample_table[1,2]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,2]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,2]*100,"%9.1f")
			putexcel C13 = "`b' (`lci'-`uci')"
			
			local b = string(matrix_sample_table[1,3]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,3]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,3]*100,"%9.1f")
			putexcel C14 = "`b' (`lci'-`uci')"
			
		* clin_dia
		foreach var of varlist clin_dia  {
			
			* Sample weights
			gen sample = 1 if `var' == 1 & analytic_sample == 1
			
			* Rescaling weights
				* bys country: egen sum_wpop_sample=sum(w_fbg) if sample==1   // pop weights not needed
				* gen weqnr_sample=w_fbg*Population2015/sum_wpop_sample

				bys country: egen sum_weq_sample=sum(w_fbg) if sample==1		
				gen weqnr_sample=w_fbg/sum_weq_sample
			
			* Set svyset
			svyset psu_num [pw = weqnr_sample], strata(stratum_num) singleunit(centered)
				
			* Estimates by country
			svy, subpop(sample): proportion educat3
			matrix matrix_sample_table = r(table)
					
			* Drop variables for loop
			drop weqnr_sample sum_weq_sample sample
		}	
		
			matlist matrix_sample_table
					
			local b = string(matrix_sample_table[1,1]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,1]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,1]*100,"%9.1f")
			putexcel E12 = "`b' (`lci'-`uci')"
		
			local b = string(matrix_sample_table[1,2]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,2]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,2]*100,"%9.1f")
			putexcel E13 = "`b' (`lci'-`uci')"
			
			local b = string(matrix_sample_table[1,3]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,3]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,3]*100,"%9.1f")
			putexcel E14 = "`b' (`lci'-`uci')"
			
		* clin_dia_hbg
		foreach var of varlist clin_dia_hbg  {
			
			* Sample weights
			gen sample = 1 if `var' == 1 & analytic_sample == 1
			
			* Rescaling weights
				* bys country: egen sum_wpop_sample=sum(w_fbg) if sample==1   // pop weights not needed
				* gen weqnr_sample=w_fbg*Population2015/sum_wpop_sample

				bys country: egen sum_weq_sample=sum(w_fbg) if sample==1		
				gen weqnr_sample=w_fbg/sum_weq_sample
			
			* Set svyset
			svyset psu_num [pw = weqnr_sample], strata(stratum_num) singleunit(centered)
				
			* Estimates by country
			svy, subpop(sample): proportion educat3
			matrix matrix_sample_table = r(table)
					
			* Drop variables for loop
			drop weqnr_sample sum_weq_sample sample
		}	
		
			matlist matrix_sample_table
					
			local b = string(matrix_sample_table[1,1]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,1]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,1]*100,"%9.1f")
			putexcel G12 = "`b' (`lci'-`uci')"
		
			local b = string(matrix_sample_table[1,2]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,2]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,2]*100,"%9.1f")
			putexcel G13 = "`b' (`lci'-`uci')"
			
			local b = string(matrix_sample_table[1,3]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,3]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,3]*100,"%9.1f")
			putexcel G14 = "`b' (`lci'-`uci')"

	* rural

		* analytic_sample
		foreach var of varlist analytic_sample  {

			* Sample weights
			gen sample = 1 if `var' == 1 
			
			* Rescaling weights
				* bys country: egen sum_wpop_sample=sum(w_fbg) if sample==1   // pop weights not needed
				* gen weqnr_sample=w_fbg*Population2015/sum_wpop_sample

				bys country: egen sum_weq_sample=sum(w_fbg) if sample==1		
				gen weqnr_sample=w_fbg/sum_weq_sample			
				
			* Set svyset
			svyset psu_num [pw = weqnr_sample], strata(stratum_num) singleunit(centered)
				
			* Estimates by country
			svy, subpop(sample): proportion rural
			matrix matrix_sample_table = r(table)
					
			* Drop variables for loop
			drop weqnr_sample sum_weq_sample sample
		}	
		
			matlist matrix_sample_table
					
			local b = string(matrix_sample_table[1,1]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,1]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,1]*100,"%9.1f")
			putexcel C16 = "`b' (`lci'-`uci')"
		
			local b = string(matrix_sample_table[1,2]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,2]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,2]*100,"%9.1f")
			putexcel C17 = "`b' (`lci'-`uci')"
				
		* clin_dia
		foreach var of varlist clin_dia  {
			
			* Sample weights
			gen sample = 1 if `var' == 1 & analytic_sample == 1
			
			* Rescaling weights
				* bys country: egen sum_wpop_sample=sum(w_fbg) if sample==1   // pop weights not needed
				* gen weqnr_sample=w_fbg*Population2015/sum_wpop_sample

				bys country: egen sum_weq_sample=sum(w_fbg) if sample==1		
				gen weqnr_sample=w_fbg/sum_weq_sample
			
			* Set svyset
			svyset psu_num [pw = weqnr_sample], strata(stratum_num) singleunit(centered)
				
			* Estimates by country
			svy, subpop(sample): proportion rural
			matrix matrix_sample_table = r(table)
					
			* Drop variables for loop
			drop weqnr_sample sum_weq_sample sample
		}	
		
			matlist matrix_sample_table
					
			local b = string(matrix_sample_table[1,1]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,1]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,1]*100,"%9.1f")
			putexcel E16 = "`b' (`lci'-`uci')"
		
			local b = string(matrix_sample_table[1,2]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,2]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,2]*100,"%9.1f")
			putexcel E17 = "`b' (`lci'-`uci')"
					
		foreach var of varlist clin_dia_hbg  {
			
			* Sample weights
			gen sample = 1 if `var' == 1 & analytic_sample == 1
			
			* Rescaling weights
				* bys country: egen sum_wpop_sample=sum(w_fbg) if sample==1   // pop weights not needed
				* gen weqnr_sample=w_fbg*Population2015/sum_wpop_sample

				bys country: egen sum_weq_sample=sum(w_fbg) if sample==1		
				gen weqnr_sample=w_fbg/sum_weq_sample
			
			* Set svyset
			svyset psu_num [pw = weqnr_sample], strata(stratum_num) singleunit(centered)
				
			* Estimates by country
			svy, subpop(sample): proportion rural
			matrix matrix_sample_table = r(table)
					
			* Drop variables for loop
			drop weqnr_sample sum_weq_sample sample
		}	
		
			matlist matrix_sample_table
					
			local b = string(matrix_sample_table[1,1]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,1]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,1]*100,"%9.1f")
			putexcel G16 = "`b' (`lci'-`uci')"
		
			local b = string(matrix_sample_table[1,2]*100,"%9.1f")
			local lci = string(matrix_sample_table[5,2]*100,"%9.1f")
			local uci = string(matrix_sample_table[6,2]*100,"%9.1f")
			putexcel G17 = "`b' (`lci'-`uci')"
		
	* Formatting	
	putexcel B2:B18,nformat(number_sep)
	putexcel D2:D18,nformat(number_sep)
	putexcel F2:F18,nformat(number_sep)

/*******************************************************************************
Appendix table - Additional details on study sample and prevalence by country
*******************************************************************************/

foreach var of varlist  clin_dia {

* Create the Excel doc
cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Rural-urban paper/"
putexcel set "Putexcel tables/Appendix table - Prevalence of `var' ${date}.xlsx", replace 
	   	   
* Make the table frame
putexcel A1 = "Country"
putexcel B1 = "Sample with condition, n"
putexcel C1 = "Prevalence of condition among rural population, weighted %"
putexcel D1 = "Prevalence of condition among urban population, weighted %"

* Macro for bottom row
	scalar bottom_row = `=scalar(n_countries)'+2	

* Sorting the dataset
	sort country_id country

* "Country"
	forval n = 1/`=scalar(n_countries)' {
		local row = `n'+1	
		putexcel A`row' = country[`n']
	}

* "Sample with hypertension, n"
	forval n = 1/`=scalar(n_countries)' {
		local row = `n'+1	
		count if analytic_sample == 1 & `var' == 1 & country_encoded == `n'
		local sample = r(N)
		
		if `sample'>0 {
			putexcel B`row' = `sample'
		}
		
		else {
			putexcel B`row' = "N/A"
		}
	}
	putexcel B2:B`=scalar(bottom_row)',nformat(number_sep)

* "Prevalence of condition among rural population, weighted %"
	forval n = 1/`=scalar(n_countries)' {
	
		matlist c_prop_`var'_`n'
		matrix results = c_prop_`var'_`n'
			
		local row = `n'+1	
		
		local b = string(results[1,4]*100,"%9.1f")
		di `b'
		local lci = string(results[5,4]*100,"%9.1f")
		local uci = string(results[6,4]*100,"%9.1f")
			
		if results[1,4] == . { //`b' empty
						putexcel C`row' = "N/A"
			}
					
		else {
			putexcel C`row' = "`b' (`lci'-`uci')"
		}		
		
	}

* "Prevalence of condition among urban population, weighted %"

	forval n = 1/`=scalar(n_countries)' {
	
		matlist c_prop_`var'_`n'
		matrix results = c_prop_`var'_`n'
			
		local row = `n'+1	
		
		local b = string(results[1,3]*100,"%9.1f")
		di `b'
		local lci = string(results[5,3]*100,"%9.1f")
		local uci = string(results[6,3]*100,"%9.1f")
			
		if results[1,3] == . { //`b' empty
						putexcel D`row' = "N/A"
			}
					
		else {
			putexcel D`row' = "`b' (`lci'-`uci')"
		}		
	}

* Overall rows
		
		putexcel A`=scalar(bottom_row)' = "Overall*"
		
		* "Sample with hypertension, n"
			count if analytic_sample == 1 & `var' == 1
			local sample = r(N)
			putexcel B`=scalar(bottom_row)' = `sample'
			
	* "Prevalence of condition among rural population, weighted %"
			matlist weq_prop_`var'
			matrix results = weq_prop_`var'
						
			local b = string(results[1,4]*100,"%9.1f")
			local lci = string(results[5,4]*100,"%9.1f")
			local uci = string(results[6,4]*100,"%9.1f")
				
			putexcel C`=scalar(bottom_row)' = "`b' (`lci'-`uci')"

	* "Prevalence of condition among urban population, weighted %"
			matlist weq_prop_`var'
			matrix results = weq_prop_`var'
						
			local b = string(results[1,3]*100,"%9.1f")
			local lci = string(results[5,3]*100,"%9.1f")
			local uci = string(results[6,3]*100,"%9.1f")
				
			putexcel D`=scalar(bottom_row)' = "`b' (`lci'-`uci')"
			
	}
	
/*******************************************************************************
Appendix table - Details on missingness by country
*******************************************************************************/

* Create the Excel doc
cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Rural-urban paper/"
putexcel set "Putexcel tables/Appendix table - Details on missing data by country ${date}.xlsx", replace 

* Macro for bottom row
	scalar bottom_row = `=scalar(n_countries)'+2	
	   	   		   
* Make the table frame
putexcel A1 = "Country"
putexcel B1 = "rural"
putexcel C1 = "cc_dm_test"
putexcel D1 = "cc_dm_diag"
putexcel E1 = "dia_med_new"
putexcel F1 = "hypt_med_new"
putexcel G1 = "statin"
putexcel H1 = "cc_dm_control"
putexcel I1 = "cc_hy_control"
putexcel J1 = "cc_lipid_control"

putexcel A`=scalar(bottom_row)' = "Overall"

* Sorting the dataset
	sort country_id country 

* "Country"
	forval n = 1/`=scalar(n_countries)' {
	local row = `n'+1	
	putexcel A`row' = country[`n']
	}

* "rural"
	forval n = 1/`=scalar(n_countries)' {
	local row = `n'+1	
	
	count if analytic_sample == 1 & rural == . & country_encoded == `n'
	local missing = r(N)
	
	count if analytic_sample == 1 & country_encoded == `n'
	local total = r(N)
	
	local missing_percent = `missing'/`total'*100
	local missing_percent_string = string(`missing_percent',"%5.1f")
	
	putexcel B`row' = `missing_percent_string'
	}	

* "cc_dm_test"
	forval n = 1/`=scalar(n_countries)' {
		local row = `n'+1	
		
		count if analytic_sample == 1 & clin_dia == 1 & country_encoded == `n' & ///
			(cc_dm_test == .)
		local missing = r(N)
		
		count if analytic_sample == 1 & clin_dia == 1 & country_encoded == `n'
		local total = r(N)
		
		local missing_percent = `missing'/`total'*100
		local missing_percent_string = string(`missing_percent',"%5.1f")
		
		if `missing_percent'== . | `missing_percent' == 100 {
			putexcel C`row' = "N/A"
		}
		
		else {
			putexcel C`row' = `missing_percent_string'
		}
	}	
	
* "cc_dm_diag"
	forval n = 1/`=scalar(n_countries)' {
		local row = `n'+1	
		
		count if analytic_sample == 1 & clin_dia == 1 & country_encoded == `n' & ///
			(cc_dm_diag == .)
		local missing = r(N)
		
		count if analytic_sample == 1 & clin_dia == 1 & country_encoded == `n'
		local total = r(N)
		
		local missing_percent = `missing'/`total'*100
		local missing_percent_string = string(`missing_percent',"%5.1f")
		
		if `missing_percent'== . | `missing_percent' == 100 {
			putexcel D`row' = "N/A"
		}
		
		else {
			putexcel D`row' = `missing_percent_string'
		}
	}
	
* "dia_med_new"
	forval n = 1/`=scalar(n_countries)' {
		local row = `n'+1	
		
		count if analytic_sample == 1 & clin_dia == 1 & country_encoded == `n' & ///
			(dia_med_new == .)
		local missing = r(N)
		
		count if analytic_sample == 1 & clin_dia == 1 & country_encoded == `n'
		local total = r(N)
		
		local missing_percent = `missing'/`total'*100
		local missing_percent_string = string(`missing_percent',"%5.1f")
		
		if `missing_percent'== . | `missing_percent' == 100 {
			putexcel E`row' = "N/A"
		}
		
		else {
			putexcel E`row' = `missing_percent_string'
		}
	}	
	
* "hypt_med_new"
	forval n = 1/`=scalar(n_countries)' {
		local row = `n'+1	
		
		count if analytic_sample == 1 & clin_dia == 1 & country_encoded == `n' & ///
			(hypt_med_new == .)
		local missing = r(N)
		
		count if analytic_sample == 1 & clin_dia == 1 & country_encoded == `n'
		local total = r(N)
		
		local missing_percent = `missing'/`total'*100
		local missing_percent_string = string(`missing_percent',"%5.1f")
		
		if `missing_percent'== . | `missing_percent' == 100 {
			putexcel F`row' = "N/A"
		}
		
		else {
			putexcel F`row' = `missing_percent_string'
		}
	}
	
* "statin"
	forval n = 1/`=scalar(n_countries)' {
		local row = `n'+1	
		
		count if analytic_sample == 1 & clin_dia == 1 & country_encoded == `n' & ///
			(statin == .)
		local missing = r(N)
		
		count if analytic_sample == 1 & clin_dia == 1 & country_encoded == `n'
		local total = r(N)
		
		local missing_percent = `missing'/`total'*100
		local missing_percent_string = string(`missing_percent',"%5.1f")
		
		if `missing_percent'== . | `missing_percent' == 100 {
			putexcel G`row' = "N/A"
		}
		
		else {
			putexcel G`row' = `missing_percent_string'
		}
	}
	
* "cc_dm_control"
	forval n = 1/`=scalar(n_countries)' {
		local row = `n'+1	
		
		count if analytic_sample == 1 & clin_dia == 1 & hbg_new == 1 & country_encoded == `n' & ///
			(cc_dm_control == .)
		local missing = r(N)
		
		count if analytic_sample == 1 & clin_dia == 1 & hbg_new == 1 & country_encoded == `n'
		local total = r(N)
		
		local missing_percent = `missing'/`total'*100
		local missing_percent_string = string(`missing_percent',"%5.1f")
		
		if `missing_percent'== . | `missing_percent' == 100 {
			putexcel H`row' = "N/A"
		}
		
		else {
			putexcel H`row' = `missing_percent_string'
		}
	}
	
* "cc_hy_control"
	forval n = 1/`=scalar(n_countries)' {
		local row = `n'+1	
		
		count if analytic_sample == 1 & clin_dia == 1 & hbg_new == 1 & country_encoded == `n' & ///
			(cc_hy_control == .)
		local missing = r(N)
		
		count if analytic_sample == 1 & clin_dia == 1 & hbg_new == 1 & country_encoded == `n'
		local total = r(N)
		
		local missing_percent = `missing'/`total'*100
		local missing_percent_string = string(`missing_percent',"%5.1f")
		
		if `missing_percent'== . | `missing_percent' == 100 {
			putexcel I`row' = "N/A"
		}
		
		else {
			putexcel I`row' = `missing_percent_string'
		}
	}
	
* "cc_lipid_control"
	forval n = 1/`=scalar(n_countries)' {
		local row = `n'+1	
		
		count if analytic_sample == 1 & clin_dia == 1 & hbg_new == 1 & country_encoded == `n' & ///
			(cc_lipid_control == .)
		local missing = r(N)
		
		count if analytic_sample == 1 & clin_dia == 1 & hbg_new == 1 & country_encoded == `n'
		local total = r(N)
		
		local missing_percent = `missing'/`total'*100
		local missing_percent_string = string(`missing_percent',"%5.1f")
		
		if `missing_percent'== . | `missing_percent' == 100 {
			putexcel J`row' = "N/A"
		}
		
		else {
			putexcel J`row' = `missing_percent_string'
		}
	}
	
* Overall
		
	* Rural
	count if analytic_sample == 1 & rural == .
	local missing = r(N)
	
	count if analytic_sample == 1
	local total = r(N)
	
	local missing_percent = `missing'/`total'*100
	local missing_percent_string = string(`missing_percent',"%5.1f")
	
	putexcel B`=scalar(bottom_row)' = `missing_percent_string'
	
	* cc_dm_test
	egen nonmissing_country = total(cc_dm_test), by(country)
	list country cc_dm_test if country_id == 1
	
	count if analytic_sample == 1 & clin_dia == 1 & cc_dm_test == . & nonmissing_country != 0
	local missing = r(N)
	
	count if analytic_sample == 1 & clin_dia == 1 & nonmissing_country != 0
	local total = r(N)
	
	local missing_percent = `missing'/`total'*100
	local missing_percent_string = string(`missing_percent',"%5.1f")
		
	drop nonmissing_country
	
	putexcel C`=scalar(bottom_row)' = `missing_percent_string'
	
	* cc_dm_diag
	egen nonmissing_country = total(cc_dm_diag), by(country)
	list country cc_dm_diag if country_id == 1
	
	count if analytic_sample == 1 & clin_dia == 1 & cc_dm_diag == . & nonmissing_country != 0
	local missing = r(N)
	
	count if analytic_sample == 1 & clin_dia == 1 & nonmissing_country != 0
	local total = r(N)
	
	local missing_percent = `missing'/`total'*100
	local missing_percent_string = string(`missing_percent',"%5.1f")
		
	drop nonmissing_country
	
	putexcel D`=scalar(bottom_row)' = `missing_percent_string'
		
	* dia_med_new
	egen nonmissing_country = total(dia_med_new), by(country)
	list country dia_med_new if country_id == 1
	
	count if analytic_sample == 1 & clin_dia == 1 & dia_med_new == . & nonmissing_country != 0
	local missing = r(N)
	
	count if analytic_sample == 1 & clin_dia == 1 & nonmissing_country != 0
	local total = r(N)
	
	local missing_percent = `missing'/`total'*100
	local missing_percent_string = string(`missing_percent',"%5.1f")
		
	drop nonmissing_country
	
	putexcel E`=scalar(bottom_row)' = `missing_percent_string'
	
	* hypt_med_new
	egen nonmissing_country = total(hypt_med_new), by(country)
	list country hypt_med_new if country_id == 1
	
	count if analytic_sample == 1 & clin_dia == 1 & hypt_med_new == . & nonmissing_country != 0
	local missing = r(N)
	
	count if analytic_sample == 1 & clin_dia == 1 & nonmissing_country != 0
	local total = r(N)
	
	local missing_percent = `missing'/`total'*100
	local missing_percent_string = string(`missing_percent',"%5.1f")
		
	drop nonmissing_country
	
	putexcel F`=scalar(bottom_row)' = `missing_percent_string'
	
	* statin
	egen nonmissing_country = total(statin), by(country)
	list country statin if country_id == 1
	
	count if analytic_sample == 1 & clin_dia == 1 & statin == . & nonmissing_country != 0
	local missing = r(N)
	
	count if analytic_sample == 1 & clin_dia == 1 & nonmissing_country != 0
	local total = r(N)
	
	local missing_percent = `missing'/`total'*100
	local missing_percent_string = string(`missing_percent',"%5.1f")
		
	drop nonmissing_country
	
	putexcel G`=scalar(bottom_row)' = `missing_percent_string'
	
	* cc_dm_control
	egen nonmissing_country = total(cc_dm_control), by(country)
	list country cc_dm_control if country_id == 1
	
	count if analytic_sample == 1 & clin_dia == 1 & hbg_new == 1 & cc_dm_control == . & nonmissing_country != 0
	local missing = r(N)
	
	count if analytic_sample == 1 & clin_dia == 1 & hbg_new == 1 & nonmissing_country != 0
	local total = r(N)
	
	local missing_percent = `missing'/`total'*100
	local missing_percent_string = string(`missing_percent',"%5.1f")
		
	drop nonmissing_country
	
	putexcel H`=scalar(bottom_row)' = `missing_percent_string'
	
	* cc_hy_control
	egen nonmissing_country = total(cc_hy_control), by(country)
	list country cc_hy_control if country_id == 1
	
	count if analytic_sample == 1 & clin_dia == 1 & hbg_new == 1 & cc_hy_control == . & nonmissing_country != 0
	local missing = r(N)
	
	count if analytic_sample == 1 & clin_dia == 1 & hbg_new == 1 & nonmissing_country != 0
	local total = r(N)
	
	local missing_percent = `missing'/`total'*100
	local missing_percent_string = string(`missing_percent',"%5.1f")
		
	drop nonmissing_country
	
	putexcel I`=scalar(bottom_row)' = `missing_percent_string'
	
	* cc_lipid_control
	egen nonmissing_country = total(cc_lipid_control), by(country)
	list country cc_lipid_control if country_id == 1
	
	count if analytic_sample == 1 & clin_dia == 1 & hbg_new == 1 & cc_lipid_control == . & nonmissing_country != 0
	local missing = r(N)
	
	count if analytic_sample == 1 & clin_dia == 1 & hbg_new == 1 & nonmissing_country != 0
	local total = r(N)
	
	local missing_percent = `missing'/`total'*100
	local missing_percent_string = string(`missing_percent',"%5.1f")
		
	drop nonmissing_country
	
	putexcel J`=scalar(bottom_row)' = `missing_percent_string'
